Source code for climag.modvege_read_files

 1"""Functions for reading input parameters and time series data
 2
 3Some code has been adapted from:
 4https://code.europa.eu/agri4cast/modvege
 5"""
 6
 7import os
 8from datetime import datetime, timezone
 9
10import pandas as pd
11import pooch
12
13
[docs] 14def download_data(url, data_dir, file_name, known_hash=None): 15 """Download data and store it in the specified directory using Pooch. 16 17 Parameters 18 ---------- 19 url : str 20 URL from which the data will be downloaded 21 data_dir : str 22 Directory to store the downloaded data 23 file_name : str 24 Name of the downloaded data file with its extension (not full path) 25 known_hash : str 26 SHA256 hash of downloaded file 27 28 Notes 29 ----- 30 This only downloads data if necessary, i.e. if the data file does not 31 already exist in the directory. 32 """ 33 os.makedirs(data_dir, exist_ok=True) 34 data_file = os.path.join(data_dir, file_name) 35 if not os.path.isfile(data_file): 36 pooch.retrieve( 37 url=url, known_hash=known_hash, fname=file_name, path=data_dir 38 ) 39 print(f"Data downloaded on: {datetime.now(tz=timezone.utc)}") 40 with open(f"{data_file}.txt", "w", encoding="utf-8") as outfile: 41 outfile.write( 42 f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n" 43 f"Download URL: {url}\n" 44 f"SHA256 hash: {pooch.file_hash(data_file)}\n" 45 ) 46 else: 47 print(f"Data '{file_name}' already exists in '{data_dir}'.") 48 with open(f"{data_file}.txt", encoding="utf-8") as f: 49 print(f.read())
50 51
[docs] 52def read_params(filename): 53 """Read the input parameters (constants) file. 54 55 Parameters 56 ---------- 57 filename : str 58 Path to the parameter input file 59 60 Returns 61 ------- 62 dict[str, float] 63 A dictionary of the input parameters 64 """ 65 params = ( 66 pd.read_csv(filename, header=None, index_col=0).squeeze().to_dict() 67 ) 68 return params
69 70
[docs] 71def read_timeseries(filename): 72 """Read the time series input data 73 74 Parameters 75 ---------- 76 filename : str 77 Path to the input time series data file 78 79 Returns 80 ------- 81 tuple[pandas.DataFrame, int] 82 A dataframe of the input time series data; 83 Length of the data (total number of days) 84 """ 85 timeseries = pd.read_csv(filename, parse_dates=["time"]) 86 timeseries.sort_values(by=["time"], inplace=True) 87 timeseries["doy"] = timeseries.set_index("time").index.dayofyear 88 timeseries.reset_index(inplace=True) 89 endday = len(timeseries) 90 return timeseries, endday