Source code for climag.modvege_run

  1"""Functions for running ModVege
  2
  3Some code has been adapted from:
  4https://code.europa.eu/agri4cast/modvege
  5"""
  6
  7import itertools
  8import os
  9from datetime import datetime, timezone
 10
 11import geopandas as gpd
 12import matplotlib.pyplot as plt
 13import numpy as np
 14import pandas as pd
 15import xarray as xr
 16
 17from climag.modvege import modvege
 18from climag.modvege_read_files import read_params, read_timeseries
 19
 20np.seterr("raise")
 21
 22output_vars = {
 23    "bm_gv": ["Green vegetative biomass", "kg DM ha⁻¹"],
 24    "bm_gr": ["Green reproductive biomass", "kg DM ha⁻¹"],
 25    "bm_dv": ["Dead vegetative biomass", "kg DM ha⁻¹"],
 26    "bm_dr": ["Dead reproductive biomass", "kg DM ha⁻¹"],
 27    "age_gv": ["Green vegetative biomass age", "kg DM ha⁻¹"],
 28    "age_gr": ["Green reproductive biomass age", "kg DM ha⁻¹"],
 29    "age_dv": ["Dead vegetative biomass age", "kg DM ha⁻¹"],
 30    "age_dr": ["Dead reproductive biomass age", "kg DM ha⁻¹"],
 31    "bm": ["Standing biomass", "kg DM ha⁻¹"],
 32    "pgro": ["Potential growth", "kg DM ha⁻¹ day⁻¹"],
 33    "gro": ["Total growth", "kg DM ha⁻¹ day⁻¹"],
 34    "i_bm": ["Ingested biomass", "kg DM ha⁻¹"],
 35    "h_bm": ["Harvested biomass", "kg DM ha⁻¹"],
 36    "c_bm": ["Daily ingested biomass", "kg DM ha⁻¹ day⁻¹"],
 37    "env": ["Environmental limitation of growth", "dimensionless"],
 38    "lai": ["Leaf area index", "dimensionless"],
 39    "aet": ["Actual evapotranspiration", "mm day⁻¹"],
 40    "wr": ["Water reserves", "mm day⁻¹"],
 41    "sen_gv": ["Senescence of green vegetative biomass", "kg DM ha⁻¹"],
 42    "sen_gr": ["Senescence of green reproductive biomass", "kg DM ha⁻¹"],
 43    "abs_dv": ["Abscission of dead vegetative biomass", "kg DM ha⁻¹"],
 44    "abs_dr": ["Abscission of dead reproductive biomass", "kg DM ha⁻¹"],
 45    "omd_gv": [
 46        "Green vegetative biomass organic matter digestibility",
 47        "kg DM ha⁻¹",
 48    ],
 49    "omd_gr": [
 50        "Green reproductive biomass organic matter digestibility",
 51        "kg DM ha⁻¹",
 52    ],
 53}
 54
 55
[docs] 56def run_modvege_csv(input_timeseries_file, input_params_file, out_dir): 57 """Run ModVege for a CSV time series input 58 59 Parameters 60 ---------- 61 input_timeseries_file : str 62 Path to input time series file 63 input_params_file : str 64 Path to input parameters file 65 out_dir : str 66 Path to output directory 67 68 Notes 69 ----- 70 Also creates time series plots 71 """ 72 # read parameter file into a dataframe 73 params = read_params(filename=input_params_file) 74 75 tseries, endday = read_timeseries(filename=input_timeseries_file) 76 77 # assume the initial water reserves is equivalent to the water holding 78 # capacity 79 params["wr"] = params["whc"] 80 81 # initialise the run 82 data_df = modvege(params=params, tseries=tseries, endday=endday) 83 84 # convert output to dataframe 85 data_df = pd.DataFrame(data_df) 86 87 # # drop biomass age columns 88 # data_df.drop( 89 # columns=["age_gv", "age_gr", "age_dv", "age_dr"], inplace=True 90 # ) 91 92 # save as CSV 93 data_df.to_csv(os.path.join(out_dir, "output.csv"), index=False) 94 95 # plot all columns 96 data_df.set_index("time", inplace=True) 97 98 # configure plot title 99 plot_title = [] 100 for val in output_vars.values(): 101 val = " [".join(val) + "]" 102 plot_title.append(val) 103 104 # plot data 105 data_df.plot( 106 subplots=True, 107 layout=(9, 3), 108 figsize=(18, 18), 109 xlabel="", 110 title=plot_title, 111 legend=False, 112 ) 113 plt.tight_layout() 114 plt.show()
115 116
[docs] 117def site_specific_params_file(input_params_vector, tseries, params): 118 """Load the site-specific characteristics layers that vary spatially 119 120 Parameters 121 ---------- 122 input_params_vector : str 123 Path to input parameter vector spatial file 124 tseries : xarray.Dataset 125 Xarray dataset of time series data 126 params : dict[str] 127 Dictionary of input parameters 128 """ 129 # site-specific characteristics that vary spatially 130 if input_params_vector is not None: 131 if "EURO-CORDEX" in tseries.attrs["dataset"]: 132 params["gpkg"] = gpd.read_file( 133 input_params_vector, layer="eurocordex" 134 ) 135 elif "HiResIreland" in tseries.attrs["dataset"]: 136 params["gpkg"] = gpd.read_file( 137 input_params_vector, layer="hiresireland" 138 ) 139 else: 140 params["gpkg"] = gpd.read_file(input_params_vector, layer="mera")
141 142
[docs] 143def run_modvege_nc( 144 input_timeseries_file, input_params_file, out_dir, input_params_vector=None 145): 146 """Run ModVege for a netCDF time series (climate data) input 147 148 Parameters 149 ---------- 150 input_timeseries_file : str 151 Path to input time series file 152 input_params_file : str 153 Path to input parameters file 154 out_dir : str 155 Path to output directory 156 input_params_vector : str 157 Path to input parameter vector spatial file 158 """ 159 print( 160 f"Running simulations for input file '{input_timeseries_file}'...", 161 datetime.now(tz=timezone.utc), 162 ) 163 164 # dictionary to store parameters 165 params = {} 166 167 # dictionary to store intermediate time series values 168 model_vals = {} 169 170 # read parameter file into a dataframe 171 params["csv"] = read_params(filename=input_params_file) 172 173 # use x and y coordinates depending on the dataset 174 if "MERA" in input_timeseries_file: 175 xcoord, ycoord = "x", "y" 176 else: 177 xcoord, ycoord = "rlon", "rlat" 178 179 tseries = xr.open_dataset( 180 input_timeseries_file, 181 decode_coords="all", 182 # chunks="auto" # the following operations do not work with Dask 183 # # yet, so chunking must be disabled... 184 ) 185 186 # site-specific characteristics that vary spatially 187 site_specific_params_file( 188 input_params_vector=input_params_vector, tseries=tseries, params=params 189 ) 190 191 # get the CRS 192 model_vals["data_crs"] = tseries.rio.crs 193 194 # loop through each year 195 model_vals["year_list"] = list(sorted(set(tseries["time"].dt.year.values))) 196 197 for year in model_vals["year_list"]: 198 tseries_y = tseries.sel(time=slice(f"{year}-01-01", f"{year}-12-31")) 199 200 # assign the outputs as new variables 201 for key, val in output_vars.items(): 202 tseries_y[key] = xr.full_like(tseries_y["PP"], fill_value=np.nan) 203 tseries_y[key].attrs = {"long_name": val[0], "units": val[1]} 204 205 # create a dictionary to store the time series output 206 # dataframes 207 data_df = {} 208 209 # loop through each grid cell 210 for rlon, rlat in itertools.product( 211 range(len(tseries_y.coords[xcoord])), 212 range(len(tseries_y.coords[ycoord])), 213 ): 214 if "MERA" in input_timeseries_file: 215 tseries_l = tseries_y.isel(x=rlon, y=rlat) 216 else: 217 tseries_l = tseries_y.isel(rlon=rlon, rlat=rlat) 218 219 # ignore null cells 220 if not tseries_l["PP"].isnull().all(): 221 # create a dataframe using the time array and variables 222 data_df[f"{rlon}_{rlat}_{year}"] = pd.DataFrame( 223 { 224 "time": tseries_l["time"], 225 "PP": tseries_l["PP"], 226 "PAR": tseries_l["PAR"], 227 "PET": tseries_l["PET"], 228 "T": tseries_l["T"], 229 } 230 ) 231 232 # site-specific characteristics 233 if input_params_vector is not None: 234 for key in ["sr", "ni", "whc"]: 235 params["csv"][key] = float( 236 params["gpkg"][ 237 ( 238 params["gpkg"][xcoord] 239 == float(tseries_l[xcoord].values) 240 ) 241 & ( 242 params["gpkg"][ycoord] 243 == float(tseries_l[ycoord].values) 244 ) 245 ][key] 246 ) 247 248 # temperatures for calculating ten-day moving averages in the 249 # following year when day < 10 250 model_vals[f"{rlon}_{rlat}_{year}"] = {} 251 model_vals[f"{rlon}_{rlat}_{year}"]["t_init"] = data_df[ 252 f"{rlon}_{rlat}_{year}" 253 ]["T"].iloc[-10:-1] 254 255 # starting values 256 if year > model_vals["year_list"][0]: 257 ( 258 params["csv"]["bm_gv"], 259 params["csv"]["bm_gr"], 260 params["csv"]["bm_dv"], 261 params["csv"]["bm_dr"], 262 params["csv"]["age_gv"], 263 params["csv"]["age_gr"], 264 params["csv"]["age_dv"], 265 params["csv"]["age_dr"], 266 params["csv"]["wr"], 267 model_vals["t_init"], 268 ) = ( 269 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_gv"], 270 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_gr"], 271 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_dv"], 272 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_dr"], 273 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_gv"], 274 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_gr"], 275 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_dv"], 276 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_dr"], 277 model_vals[f"{rlon}_{rlat}_{year - 1}"]["wr"], 278 model_vals[f"{rlon}_{rlat}_{year - 1}"]["t_init"], 279 ) 280 else: 281 params["csv"]["wr"] = params["csv"]["whc"] 282 model_vals["t_init"] = None 283 284 # initialise the run 285 data_df[f"{rlon}_{rlat}_{year}"] = modvege( 286 params=params["csv"], 287 tseries=data_df[f"{rlon}_{rlat}_{year}"], 288 endday=len(tseries_l["time"]), 289 t_init=model_vals["t_init"], 290 ) 291 292 # convert output to dataframe 293 data_df[f"{rlon}_{rlat}_{year}"] = pd.DataFrame( 294 data_df[f"{rlon}_{rlat}_{year}"] 295 ) 296 297 # save starting values for the next simulation year 298 # round to 7 decimal places to prevent FloatingPointError 299 ( 300 model_vals[f"{rlon}_{rlat}_{year}"]["bm_gv"], 301 model_vals[f"{rlon}_{rlat}_{year}"]["bm_gr"], 302 model_vals[f"{rlon}_{rlat}_{year}"]["bm_dv"], 303 model_vals[f"{rlon}_{rlat}_{year}"]["bm_dr"], 304 model_vals[f"{rlon}_{rlat}_{year}"]["age_gv"], 305 model_vals[f"{rlon}_{rlat}_{year}"]["age_gr"], 306 model_vals[f"{rlon}_{rlat}_{year}"]["age_dv"], 307 model_vals[f"{rlon}_{rlat}_{year}"]["age_dr"], 308 model_vals[f"{rlon}_{rlat}_{year}"]["wr"], 309 ) = ( 310 round( 311 data_df[f"{rlon}_{rlat}_{year}"]["bm_gv"].iat[-1], 7 312 ), 313 round( 314 data_df[f"{rlon}_{rlat}_{year}"]["bm_gr"].iat[-1], 7 315 ), 316 round( 317 data_df[f"{rlon}_{rlat}_{year}"]["bm_dv"].iat[-1], 7 318 ), 319 round( 320 data_df[f"{rlon}_{rlat}_{year}"]["bm_dr"].iat[-1], 7 321 ), 322 round( 323 data_df[f"{rlon}_{rlat}_{year}"]["age_gv"].iat[-1], 7 324 ), 325 round( 326 data_df[f"{rlon}_{rlat}_{year}"]["age_gr"].iat[-1], 7 327 ), 328 round( 329 data_df[f"{rlon}_{rlat}_{year}"]["age_dv"].iat[-1], 7 330 ), 331 round( 332 data_df[f"{rlon}_{rlat}_{year}"]["age_dr"].iat[-1], 7 333 ), 334 round(data_df[f"{rlon}_{rlat}_{year}"]["wr"].iat[-1], 7), 335 ) 336 337 # # drop biomass age columns 338 # data_df[f"{rlon}_{rlat}_{year}"].drop( 339 # columns=["age_gv", "age_gr", "age_dv", "age_dr"], 340 # inplace=True 341 # ) 342 343 # assign the output variables to the main Xarray dataset 344 for key in output_vars: 345 tseries_y[key].loc[ 346 { 347 xcoord: tseries_l.coords[xcoord], 348 ycoord: tseries_l.coords[ycoord], 349 "time": tseries_l.coords["time"], 350 } 351 ] = np.array(data_df[f"{rlon}_{rlat}_{year}"][key]) 352 353 # delete input variables 354 tseries_y = tseries_y.drop_vars(list(tseries.data_vars)) 355 356 # assign attributes for the data 357 tseries_y.attrs = { 358 "creation_date": str(datetime.now(tz=timezone.utc)), 359 "contact": "nstreethran@ucc.ie", 360 "frequency": "day", 361 "references": "https://github.com/ClimAg", 362 "input_dataset": tseries.attrs["dataset"], 363 } 364 365 # reassign CRS 366 tseries_y.rio.write_crs(model_vals["data_crs"], inplace=True) 367 368 # save as a NetCDF file 369 if "MERA" in input_timeseries_file: 370 model_vals["out_dir"] = os.path.join(out_dir, "MERA") 371 else: 372 model_vals["out_dir"] = os.path.join( 373 out_dir, 374 tseries.attrs["dataset"].split("_")[1], 375 tseries.attrs["dataset"].split("_")[4], 376 tseries.attrs["dataset"].split("_")[3], 377 ) 378 os.makedirs(model_vals["out_dir"], exist_ok=True) 379 tseries_y.to_netcdf( 380 os.path.join( 381 model_vals["out_dir"], 382 f"modvege_{tseries.attrs['dataset']}_{year}.nc", 383 ) 384 ) 385 386 print(f"{year} complete...", datetime.now(tz=timezone.utc))
387 388
[docs] 389def run_modvege( 390 input_params_file, input_timeseries_file, out_dir, input_params_vector=None 391): 392 """Run ModVege 393 394 Parameters 395 ---------- 396 input_params_file : str 397 File path for the input parameters 398 input_timeseries_file : str 399 File path for the input time series 400 out_dir : str 401 Directory to store output file(s) 402 403 Notes 404 ----- 405 Preprocess the inputs to run ModVege as a function and save the results 406 as a CSV file 407 """ 408 if input_timeseries_file.endswith(".csv"): 409 run_modvege_csv(input_timeseries_file, input_params_file, out_dir) 410 else: # open the climate model dataset 411 run_modvege_nc( 412 input_timeseries_file, 413 input_params_file, 414 out_dir, 415 input_params_vector, 416 )