Source code for climag.modvege_read_files
1"""Functions for reading input parameters and time series data
2
3Some code has been adapted from:
4https://code.europa.eu/agri4cast/modvege
5"""
6
7import os
8from datetime import datetime, timezone
9
10import pandas as pd
11import pooch
12
13
[docs]
14def download_data(url, data_dir, file_name, known_hash=None):
15 """Download data and store it in the specified directory using Pooch.
16
17 Parameters
18 ----------
19 url : str
20 URL from which the data will be downloaded
21 data_dir : str
22 Directory to store the downloaded data
23 file_name : str
24 Name of the downloaded data file with its extension (not full path)
25 known_hash : str
26 SHA256 hash of downloaded file
27
28 Notes
29 -----
30 This only downloads data if necessary, i.e. if the data file does not
31 already exist in the directory.
32 """
33 os.makedirs(data_dir, exist_ok=True)
34 data_file = os.path.join(data_dir, file_name)
35 if not os.path.isfile(data_file):
36 pooch.retrieve(
37 url=url, known_hash=known_hash, fname=file_name, path=data_dir
38 )
39 print(f"Data downloaded on: {datetime.now(tz=timezone.utc)}")
40 with open(f"{data_file}.txt", "w", encoding="utf-8") as outfile:
41 outfile.write(
42 f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
43 f"Download URL: {url}\n"
44 f"SHA256 hash: {pooch.file_hash(data_file)}\n"
45 )
46 else:
47 print(f"Data '{file_name}' already exists in '{data_dir}'.")
48 with open(f"{data_file}.txt", encoding="utf-8") as f:
49 print(f.read())
50
51
[docs]
52def read_params(filename):
53 """Read the input parameters (constants) file.
54
55 Parameters
56 ----------
57 filename : str
58 Path to the parameter input file
59
60 Returns
61 -------
62 dict[str, float]
63 A dictionary of the input parameters
64 """
65 params = (
66 pd.read_csv(filename, header=None, index_col=0).squeeze().to_dict()
67 )
68 return params
69
70
[docs]
71def read_timeseries(filename):
72 """Read the time series input data
73
74 Parameters
75 ----------
76 filename : str
77 Path to the input time series data file
78
79 Returns
80 -------
81 tuple[pandas.DataFrame, int]
82 A dataframe of the input time series data;
83 Length of the data (total number of days)
84 """
85 timeseries = pd.read_csv(filename, parse_dates=["time"])
86 timeseries.sort_values(by=["time"], inplace=True)
87 timeseries["doy"] = timeseries.set_index("time").index.dayofyear
88 timeseries.reset_index(inplace=True)
89 endday = len(timeseries)
90 return timeseries, endday