1"""Functions for running ModVege
2
3Some code has been adapted from:
4https://code.europa.eu/agri4cast/modvege
5"""
6
7import itertools
8import os
9from datetime import datetime, timezone
10
11import geopandas as gpd
12import matplotlib.pyplot as plt
13import numpy as np
14import pandas as pd
15import xarray as xr
16
17from climag.modvege import modvege
18from climag.modvege_read_files import read_params, read_timeseries
19
20np.seterr("raise")
21
22output_vars = {
23 "bm_gv": ["Green vegetative biomass", "kg DM ha⁻¹"],
24 "bm_gr": ["Green reproductive biomass", "kg DM ha⁻¹"],
25 "bm_dv": ["Dead vegetative biomass", "kg DM ha⁻¹"],
26 "bm_dr": ["Dead reproductive biomass", "kg DM ha⁻¹"],
27 "age_gv": ["Green vegetative biomass age", "kg DM ha⁻¹"],
28 "age_gr": ["Green reproductive biomass age", "kg DM ha⁻¹"],
29 "age_dv": ["Dead vegetative biomass age", "kg DM ha⁻¹"],
30 "age_dr": ["Dead reproductive biomass age", "kg DM ha⁻¹"],
31 "bm": ["Standing biomass", "kg DM ha⁻¹"],
32 "pgro": ["Potential growth", "kg DM ha⁻¹ day⁻¹"],
33 "gro": ["Total growth", "kg DM ha⁻¹ day⁻¹"],
34 "i_bm": ["Ingested biomass", "kg DM ha⁻¹"],
35 "h_bm": ["Harvested biomass", "kg DM ha⁻¹"],
36 "c_bm": ["Daily ingested biomass", "kg DM ha⁻¹ day⁻¹"],
37 "env": ["Environmental limitation of growth", "dimensionless"],
38 "lai": ["Leaf area index", "dimensionless"],
39 "aet": ["Actual evapotranspiration", "mm day⁻¹"],
40 "wr": ["Water reserves", "mm day⁻¹"],
41 "sen_gv": ["Senescence of green vegetative biomass", "kg DM ha⁻¹"],
42 "sen_gr": ["Senescence of green reproductive biomass", "kg DM ha⁻¹"],
43 "abs_dv": ["Abscission of dead vegetative biomass", "kg DM ha⁻¹"],
44 "abs_dr": ["Abscission of dead reproductive biomass", "kg DM ha⁻¹"],
45 "omd_gv": [
46 "Green vegetative biomass organic matter digestibility",
47 "kg DM ha⁻¹",
48 ],
49 "omd_gr": [
50 "Green reproductive biomass organic matter digestibility",
51 "kg DM ha⁻¹",
52 ],
53}
54
55
[docs]
56def run_modvege_csv(input_timeseries_file, input_params_file, out_dir):
57 """Run ModVege for a CSV time series input
58
59 Parameters
60 ----------
61 input_timeseries_file : str
62 Path to input time series file
63 input_params_file : str
64 Path to input parameters file
65 out_dir : str
66 Path to output directory
67
68 Notes
69 -----
70 Also creates time series plots
71 """
72 # read parameter file into a dataframe
73 params = read_params(filename=input_params_file)
74
75 tseries, endday = read_timeseries(filename=input_timeseries_file)
76
77 # assume the initial water reserves is equivalent to the water holding
78 # capacity
79 params["wr"] = params["whc"]
80
81 # initialise the run
82 data_df = modvege(params=params, tseries=tseries, endday=endday)
83
84 # convert output to dataframe
85 data_df = pd.DataFrame(data_df)
86
87 # # drop biomass age columns
88 # data_df.drop(
89 # columns=["age_gv", "age_gr", "age_dv", "age_dr"], inplace=True
90 # )
91
92 # save as CSV
93 data_df.to_csv(os.path.join(out_dir, "output.csv"), index=False)
94
95 # plot all columns
96 data_df.set_index("time", inplace=True)
97
98 # configure plot title
99 plot_title = []
100 for val in output_vars.values():
101 val = " [".join(val) + "]"
102 plot_title.append(val)
103
104 # plot data
105 data_df.plot(
106 subplots=True,
107 layout=(9, 3),
108 figsize=(18, 18),
109 xlabel="",
110 title=plot_title,
111 legend=False,
112 )
113 plt.tight_layout()
114 plt.show()
115
116
[docs]
117def site_specific_params_file(input_params_vector, tseries, params):
118 """Load the site-specific characteristics layers that vary spatially
119
120 Parameters
121 ----------
122 input_params_vector : str
123 Path to input parameter vector spatial file
124 tseries : xarray.Dataset
125 Xarray dataset of time series data
126 params : dict[str]
127 Dictionary of input parameters
128 """
129 # site-specific characteristics that vary spatially
130 if input_params_vector is not None:
131 if "EURO-CORDEX" in tseries.attrs["dataset"]:
132 params["gpkg"] = gpd.read_file(
133 input_params_vector, layer="eurocordex"
134 )
135 elif "HiResIreland" in tseries.attrs["dataset"]:
136 params["gpkg"] = gpd.read_file(
137 input_params_vector, layer="hiresireland"
138 )
139 else:
140 params["gpkg"] = gpd.read_file(input_params_vector, layer="mera")
141
142
[docs]
143def run_modvege_nc(
144 input_timeseries_file, input_params_file, out_dir, input_params_vector=None
145):
146 """Run ModVege for a netCDF time series (climate data) input
147
148 Parameters
149 ----------
150 input_timeseries_file : str
151 Path to input time series file
152 input_params_file : str
153 Path to input parameters file
154 out_dir : str
155 Path to output directory
156 input_params_vector : str
157 Path to input parameter vector spatial file
158 """
159 print(
160 f"Running simulations for input file '{input_timeseries_file}'...",
161 datetime.now(tz=timezone.utc),
162 )
163
164 # dictionary to store parameters
165 params = {}
166
167 # dictionary to store intermediate time series values
168 model_vals = {}
169
170 # read parameter file into a dataframe
171 params["csv"] = read_params(filename=input_params_file)
172
173 # use x and y coordinates depending on the dataset
174 if "MERA" in input_timeseries_file:
175 xcoord, ycoord = "x", "y"
176 else:
177 xcoord, ycoord = "rlon", "rlat"
178
179 tseries = xr.open_dataset(
180 input_timeseries_file,
181 decode_coords="all",
182 # chunks="auto" # the following operations do not work with Dask
183 # # yet, so chunking must be disabled...
184 )
185
186 # site-specific characteristics that vary spatially
187 site_specific_params_file(
188 input_params_vector=input_params_vector, tseries=tseries, params=params
189 )
190
191 # get the CRS
192 model_vals["data_crs"] = tseries.rio.crs
193
194 # loop through each year
195 model_vals["year_list"] = list(sorted(set(tseries["time"].dt.year.values)))
196
197 for year in model_vals["year_list"]:
198 tseries_y = tseries.sel(time=slice(f"{year}-01-01", f"{year}-12-31"))
199
200 # assign the outputs as new variables
201 for key, val in output_vars.items():
202 tseries_y[key] = xr.full_like(tseries_y["PP"], fill_value=np.nan)
203 tseries_y[key].attrs = {"long_name": val[0], "units": val[1]}
204
205 # create a dictionary to store the time series output
206 # dataframes
207 data_df = {}
208
209 # loop through each grid cell
210 for rlon, rlat in itertools.product(
211 range(len(tseries_y.coords[xcoord])),
212 range(len(tseries_y.coords[ycoord])),
213 ):
214 if "MERA" in input_timeseries_file:
215 tseries_l = tseries_y.isel(x=rlon, y=rlat)
216 else:
217 tseries_l = tseries_y.isel(rlon=rlon, rlat=rlat)
218
219 # ignore null cells
220 if not tseries_l["PP"].isnull().all():
221 # create a dataframe using the time array and variables
222 data_df[f"{rlon}_{rlat}_{year}"] = pd.DataFrame(
223 {
224 "time": tseries_l["time"],
225 "PP": tseries_l["PP"],
226 "PAR": tseries_l["PAR"],
227 "PET": tseries_l["PET"],
228 "T": tseries_l["T"],
229 }
230 )
231
232 # site-specific characteristics
233 if input_params_vector is not None:
234 for key in ["sr", "ni", "whc"]:
235 params["csv"][key] = float(
236 params["gpkg"][
237 (
238 params["gpkg"][xcoord]
239 == float(tseries_l[xcoord].values)
240 )
241 & (
242 params["gpkg"][ycoord]
243 == float(tseries_l[ycoord].values)
244 )
245 ][key]
246 )
247
248 # temperatures for calculating ten-day moving averages in the
249 # following year when day < 10
250 model_vals[f"{rlon}_{rlat}_{year}"] = {}
251 model_vals[f"{rlon}_{rlat}_{year}"]["t_init"] = data_df[
252 f"{rlon}_{rlat}_{year}"
253 ]["T"].iloc[-10:-1]
254
255 # starting values
256 if year > model_vals["year_list"][0]:
257 (
258 params["csv"]["bm_gv"],
259 params["csv"]["bm_gr"],
260 params["csv"]["bm_dv"],
261 params["csv"]["bm_dr"],
262 params["csv"]["age_gv"],
263 params["csv"]["age_gr"],
264 params["csv"]["age_dv"],
265 params["csv"]["age_dr"],
266 params["csv"]["wr"],
267 model_vals["t_init"],
268 ) = (
269 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_gv"],
270 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_gr"],
271 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_dv"],
272 model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_dr"],
273 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_gv"],
274 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_gr"],
275 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_dv"],
276 model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_dr"],
277 model_vals[f"{rlon}_{rlat}_{year - 1}"]["wr"],
278 model_vals[f"{rlon}_{rlat}_{year - 1}"]["t_init"],
279 )
280 else:
281 params["csv"]["wr"] = params["csv"]["whc"]
282 model_vals["t_init"] = None
283
284 # initialise the run
285 data_df[f"{rlon}_{rlat}_{year}"] = modvege(
286 params=params["csv"],
287 tseries=data_df[f"{rlon}_{rlat}_{year}"],
288 endday=len(tseries_l["time"]),
289 t_init=model_vals["t_init"],
290 )
291
292 # convert output to dataframe
293 data_df[f"{rlon}_{rlat}_{year}"] = pd.DataFrame(
294 data_df[f"{rlon}_{rlat}_{year}"]
295 )
296
297 # save starting values for the next simulation year
298 # round to 7 decimal places to prevent FloatingPointError
299 (
300 model_vals[f"{rlon}_{rlat}_{year}"]["bm_gv"],
301 model_vals[f"{rlon}_{rlat}_{year}"]["bm_gr"],
302 model_vals[f"{rlon}_{rlat}_{year}"]["bm_dv"],
303 model_vals[f"{rlon}_{rlat}_{year}"]["bm_dr"],
304 model_vals[f"{rlon}_{rlat}_{year}"]["age_gv"],
305 model_vals[f"{rlon}_{rlat}_{year}"]["age_gr"],
306 model_vals[f"{rlon}_{rlat}_{year}"]["age_dv"],
307 model_vals[f"{rlon}_{rlat}_{year}"]["age_dr"],
308 model_vals[f"{rlon}_{rlat}_{year}"]["wr"],
309 ) = (
310 round(
311 data_df[f"{rlon}_{rlat}_{year}"]["bm_gv"].iat[-1], 7
312 ),
313 round(
314 data_df[f"{rlon}_{rlat}_{year}"]["bm_gr"].iat[-1], 7
315 ),
316 round(
317 data_df[f"{rlon}_{rlat}_{year}"]["bm_dv"].iat[-1], 7
318 ),
319 round(
320 data_df[f"{rlon}_{rlat}_{year}"]["bm_dr"].iat[-1], 7
321 ),
322 round(
323 data_df[f"{rlon}_{rlat}_{year}"]["age_gv"].iat[-1], 7
324 ),
325 round(
326 data_df[f"{rlon}_{rlat}_{year}"]["age_gr"].iat[-1], 7
327 ),
328 round(
329 data_df[f"{rlon}_{rlat}_{year}"]["age_dv"].iat[-1], 7
330 ),
331 round(
332 data_df[f"{rlon}_{rlat}_{year}"]["age_dr"].iat[-1], 7
333 ),
334 round(data_df[f"{rlon}_{rlat}_{year}"]["wr"].iat[-1], 7),
335 )
336
337 # # drop biomass age columns
338 # data_df[f"{rlon}_{rlat}_{year}"].drop(
339 # columns=["age_gv", "age_gr", "age_dv", "age_dr"],
340 # inplace=True
341 # )
342
343 # assign the output variables to the main Xarray dataset
344 for key in output_vars:
345 tseries_y[key].loc[
346 {
347 xcoord: tseries_l.coords[xcoord],
348 ycoord: tseries_l.coords[ycoord],
349 "time": tseries_l.coords["time"],
350 }
351 ] = np.array(data_df[f"{rlon}_{rlat}_{year}"][key])
352
353 # delete input variables
354 tseries_y = tseries_y.drop_vars(list(tseries.data_vars))
355
356 # assign attributes for the data
357 tseries_y.attrs = {
358 "creation_date": str(datetime.now(tz=timezone.utc)),
359 "contact": "nstreethran@ucc.ie",
360 "frequency": "day",
361 "references": "https://github.com/ClimAg",
362 "input_dataset": tseries.attrs["dataset"],
363 }
364
365 # reassign CRS
366 tseries_y.rio.write_crs(model_vals["data_crs"], inplace=True)
367
368 # save as a NetCDF file
369 if "MERA" in input_timeseries_file:
370 model_vals["out_dir"] = os.path.join(out_dir, "MERA")
371 else:
372 model_vals["out_dir"] = os.path.join(
373 out_dir,
374 tseries.attrs["dataset"].split("_")[1],
375 tseries.attrs["dataset"].split("_")[4],
376 tseries.attrs["dataset"].split("_")[3],
377 )
378 os.makedirs(model_vals["out_dir"], exist_ok=True)
379 tseries_y.to_netcdf(
380 os.path.join(
381 model_vals["out_dir"],
382 f"modvege_{tseries.attrs['dataset']}_{year}.nc",
383 )
384 )
385
386 print(f"{year} complete...", datetime.now(tz=timezone.utc))
387
388
[docs]
389def run_modvege(
390 input_params_file, input_timeseries_file, out_dir, input_params_vector=None
391):
392 """Run ModVege
393
394 Parameters
395 ----------
396 input_params_file : str
397 File path for the input parameters
398 input_timeseries_file : str
399 File path for the input time series
400 out_dir : str
401 Directory to store output file(s)
402
403 Notes
404 -----
405 Preprocess the inputs to run ModVege as a function and save the results
406 as a CSV file
407 """
408 if input_timeseries_file.endswith(".csv"):
409 run_modvege_csv(input_timeseries_file, input_params_file, out_dir)
410 else: # open the climate model dataset
411 run_modvege_nc(
412 input_timeseries_file,
413 input_params_file,
414 out_dir,
415 input_params_vector,
416 )