Source code for climag.modvege_run

  1"""Functions for running ModVege
  2
  3Some code has been adapted from:
  4https://code.europa.eu/agri4cast/modvege
  5"""
  6
  7import itertools
  8import os
  9from datetime import datetime, timezone
 10
 11import geopandas as gpd
 12import matplotlib.pyplot as plt
 13import numpy as np
 14import pandas as pd
 15import xarray as xr
 16
 17from climag.modvege import modvege
 18from climag.modvege_read_files import read_params, read_timeseries
 19
 20np.seterr("raise")
 21
 22output_vars = {
 23    "bm_gv": ["Green vegetative biomass", "kg DM ha⁻¹"],
 24    "bm_gr": ["Green reproductive biomass", "kg DM ha⁻¹"],
 25    "bm_dv": ["Dead vegetative biomass", "kg DM ha⁻¹"],
 26    "bm_dr": ["Dead reproductive biomass", "kg DM ha⁻¹"],
 27    "age_gv": ["Green vegetative biomass age", "kg DM ha⁻¹"],
 28    "age_gr": ["Green reproductive biomass age", "kg DM ha⁻¹"],
 29    "age_dv": ["Dead vegetative biomass age", "kg DM ha⁻¹"],
 30    "age_dr": ["Dead reproductive biomass age", "kg DM ha⁻¹"],
 31    "bm": ["Standing biomass", "kg DM ha⁻¹"],
 32    "pgro": ["Potential growth", "kg DM ha⁻¹ day⁻¹"],
 33    "gro": ["Total growth", "kg DM ha⁻¹ day⁻¹"],
 34    "i_bm": ["Ingested biomass", "kg DM ha⁻¹"],
 35    "h_bm": ["Harvested biomass", "kg DM ha⁻¹"],
 36    "c_bm": ["Daily ingested biomass", "kg DM ha⁻¹ day⁻¹"],
 37    "env": ["Environmental limitation of growth", "dimensionless"],
 38    "lai": ["Leaf area index", "dimensionless"],
 39    "aet": ["Actual evapotranspiration", "mm day⁻¹"],
 40    "wr": ["Water reserves", "mm day⁻¹"],
 41    "sen_gv": ["Senescence of green vegetative biomass", "kg DM ha⁻¹"],
 42    "sen_gr": ["Senescence of green reproductive biomass", "kg DM ha⁻¹"],
 43    "abs_dv": ["Abscission of dead vegetative biomass", "kg DM ha⁻¹"],
 44    "abs_dr": ["Abscission of dead reproductive biomass", "kg DM ha⁻¹"],
 45    "omd_gv": [
 46        "Green vegetative biomass organic matter digestibility",
 47        "kg DM ha⁻¹",
 48    ],
 49    "omd_gr": [
 50        "Green reproductive biomass organic matter digestibility",
 51        "kg DM ha⁻¹",
 52    ],
 53}
 54
 55

[docs]
 56def run_modvege_csv(input_timeseries_file, input_params_file, out_dir):
 57    """Run ModVege for a CSV time series input
 58
 59    Parameters
 60    ----------
 61    input_timeseries_file : str
 62        Path to input time series file
 63    input_params_file : str
 64        Path to input parameters file
 65    out_dir : str
 66        Path to output directory
 67
 68    Notes
 69    -----
 70    Also creates time series plots
 71    """
 72    # read parameter file into a dataframe
 73    params = read_params(filename=input_params_file)
 74
 75    tseries, endday = read_timeseries(filename=input_timeseries_file)
 76
 77    # assume the initial water reserves is equivalent to the water holding
 78    # capacity
 79    params["wr"] = params["whc"]
 80
 81    # initialise the run
 82    data_df = modvege(params=params, tseries=tseries, endday=endday)
 83
 84    # convert output to dataframe
 85    data_df = pd.DataFrame(data_df)
 86
 87    # # drop biomass age columns
 88    # data_df.drop(
 89    #     columns=["age_gv", "age_gr", "age_dv", "age_dr"], inplace=True
 90    # )
 91
 92    # save as CSV
 93    data_df.to_csv(os.path.join(out_dir, "output.csv"), index=False)
 94
 95    # plot all columns
 96    data_df.set_index("time", inplace=True)
 97
 98    # configure plot title
 99    plot_title = []
100    for val in output_vars.values():
101        val = " [".join(val) + "]"
102        plot_title.append(val)
103
104    # plot data
105    data_df.plot(
106        subplots=True,
107        layout=(9, 3),
108        figsize=(18, 18),
109        xlabel="",
110        title=plot_title,
111        legend=False,
112    )
113    plt.tight_layout()
114    plt.show()

115
116

[docs]
117def site_specific_params_file(input_params_vector, tseries, params):
118    """Load the site-specific characteristics layers that vary spatially
119
120    Parameters
121    ----------
122    input_params_vector : str
123        Path to input parameter vector spatial file
124    tseries : xarray.Dataset
125        Xarray dataset of time series data
126    params : dict[str]
127        Dictionary of input parameters
128    """
129    # site-specific characteristics that vary spatially
130    if input_params_vector is not None:
131        if "EURO-CORDEX" in tseries.attrs["dataset"]:
132            params["gpkg"] = gpd.read_file(
133                input_params_vector, layer="eurocordex"
134            )
135        elif "HiResIreland" in tseries.attrs["dataset"]:
136            params["gpkg"] = gpd.read_file(
137                input_params_vector, layer="hiresireland"
138            )
139        else:
140            params["gpkg"] = gpd.read_file(input_params_vector, layer="mera")

141
142

[docs]
143def run_modvege_nc(
144    input_timeseries_file, input_params_file, out_dir, input_params_vector=None
145):
146    """Run ModVege for a netCDF time series (climate data) input
147
148    Parameters
149    ----------
150    input_timeseries_file : str
151        Path to input time series file
152    input_params_file : str
153        Path to input parameters file
154    out_dir : str
155        Path to output directory
156    input_params_vector : str
157        Path to input parameter vector spatial file
158    """
159    print(
160        f"Running simulations for input file '{input_timeseries_file}'...",
161        datetime.now(tz=timezone.utc),
162    )
163
164    # dictionary to store parameters
165    params = {}
166
167    # dictionary to store intermediate time series values
168    model_vals = {}
169
170    # read parameter file into a dataframe
171    params["csv"] = read_params(filename=input_params_file)
172
173    # use x and y coordinates depending on the dataset
174    if "MERA" in input_timeseries_file:
175        xcoord, ycoord = "x", "y"
176    else:
177        xcoord, ycoord = "rlon", "rlat"
178
179    tseries = xr.open_dataset(
180        input_timeseries_file,
181        decode_coords="all",
182        # chunks="auto"  # the following operations do not work with Dask
183        #                # yet, so chunking must be disabled...
184    )
185
186    # site-specific characteristics that vary spatially
187    site_specific_params_file(
188        input_params_vector=input_params_vector, tseries=tseries, params=params
189    )
190
191    # get the CRS
192    model_vals["data_crs"] = tseries.rio.crs
193
194    # loop through each year
195    model_vals["year_list"] = list(sorted(set(tseries["time"].dt.year.values)))
196
197    for year in model_vals["year_list"]:
198        tseries_y = tseries.sel(time=slice(f"{year}-01-01", f"{year}-12-31"))
199
200        # assign the outputs as new variables
201        for key, val in output_vars.items():
202            tseries_y[key] = xr.full_like(tseries_y["PP"], fill_value=np.nan)
203            tseries_y[key].attrs = {"long_name": val[0], "units": val[1]}
204
205        # create a dictionary to store the time series output
206        # dataframes
207        data_df = {}
208
209        # loop through each grid cell
210        for rlon, rlat in itertools.product(
211            range(len(tseries_y.coords[xcoord])),
212            range(len(tseries_y.coords[ycoord])),
213        ):
214            if "MERA" in input_timeseries_file:
215                tseries_l = tseries_y.isel(x=rlon, y=rlat)
216            else:
217                tseries_l = tseries_y.isel(rlon=rlon, rlat=rlat)
218
219            # ignore null cells
220            if not tseries_l["PP"].isnull().all():
221                # create a dataframe using the time array and variables
222                data_df[f"{rlon}_{rlat}_{year}"] = pd.DataFrame(
223                    {
224                        "time": tseries_l["time"],
225                        "PP": tseries_l["PP"],
226                        "PAR": tseries_l["PAR"],
227                        "PET": tseries_l["PET"],
228                        "T": tseries_l["T"],
229                    }
230                )
231
232                # site-specific characteristics
233                if input_params_vector is not None:
234                    for key in ["sr", "ni", "whc"]:
235                        params["csv"][key] = float(
236                            params["gpkg"][
237                                (
238                                    params["gpkg"][xcoord]
239                                    == float(tseries_l[xcoord].values)
240                                )
241                                & (
242                                    params["gpkg"][ycoord]
243                                    == float(tseries_l[ycoord].values)
244                                )
245                            ][key]
246                        )
247
248                # temperatures for calculating ten-day moving averages in the
249                # following year when day < 10
250                model_vals[f"{rlon}_{rlat}_{year}"] = {}
251                model_vals[f"{rlon}_{rlat}_{year}"]["t_init"] = data_df[
252                    f"{rlon}_{rlat}_{year}"
253                ]["T"].iloc[-10:-1]
254
255                # starting values
256                if year > model_vals["year_list"][0]:
257                    (
258                        params["csv"]["bm_gv"],
259                        params["csv"]["bm_gr"],
260                        params["csv"]["bm_dv"],
261                        params["csv"]["bm_dr"],
262                        params["csv"]["age_gv"],
263                        params["csv"]["age_gr"],
264                        params["csv"]["age_dv"],
265                        params["csv"]["age_dr"],
266                        params["csv"]["wr"],
267                        model_vals["t_init"],
268                    ) = (
269                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_gv"],
270                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_gr"],
271                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_dv"],
272                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["bm_dr"],
273                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_gv"],
274                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_gr"],
275                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_dv"],
276                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["age_dr"],
277                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["wr"],
278                        model_vals[f"{rlon}_{rlat}_{year - 1}"]["t_init"],
279                    )
280                else:
281                    params["csv"]["wr"] = params["csv"]["whc"]
282                    model_vals["t_init"] = None
283
284                # initialise the run
285                data_df[f"{rlon}_{rlat}_{year}"] = modvege(
286                    params=params["csv"],
287                    tseries=data_df[f"{rlon}_{rlat}_{year}"],
288                    endday=len(tseries_l["time"]),
289                    t_init=model_vals["t_init"],
290                )
291
292                # convert output to dataframe
293                data_df[f"{rlon}_{rlat}_{year}"] = pd.DataFrame(
294                    data_df[f"{rlon}_{rlat}_{year}"]
295                )
296
297                # save starting values for the next simulation year
298                # round to 7 decimal places to prevent FloatingPointError
299                (
300                    model_vals[f"{rlon}_{rlat}_{year}"]["bm_gv"],
301                    model_vals[f"{rlon}_{rlat}_{year}"]["bm_gr"],
302                    model_vals[f"{rlon}_{rlat}_{year}"]["bm_dv"],
303                    model_vals[f"{rlon}_{rlat}_{year}"]["bm_dr"],
304                    model_vals[f"{rlon}_{rlat}_{year}"]["age_gv"],
305                    model_vals[f"{rlon}_{rlat}_{year}"]["age_gr"],
306                    model_vals[f"{rlon}_{rlat}_{year}"]["age_dv"],
307                    model_vals[f"{rlon}_{rlat}_{year}"]["age_dr"],
308                    model_vals[f"{rlon}_{rlat}_{year}"]["wr"],
309                ) = (
310                    round(
311                        data_df[f"{rlon}_{rlat}_{year}"]["bm_gv"].iat[-1], 7
312                    ),
313                    round(
314                        data_df[f"{rlon}_{rlat}_{year}"]["bm_gr"].iat[-1], 7
315                    ),
316                    round(
317                        data_df[f"{rlon}_{rlat}_{year}"]["bm_dv"].iat[-1], 7
318                    ),
319                    round(
320                        data_df[f"{rlon}_{rlat}_{year}"]["bm_dr"].iat[-1], 7
321                    ),
322                    round(
323                        data_df[f"{rlon}_{rlat}_{year}"]["age_gv"].iat[-1], 7
324                    ),
325                    round(
326                        data_df[f"{rlon}_{rlat}_{year}"]["age_gr"].iat[-1], 7
327                    ),
328                    round(
329                        data_df[f"{rlon}_{rlat}_{year}"]["age_dv"].iat[-1], 7
330                    ),
331                    round(
332                        data_df[f"{rlon}_{rlat}_{year}"]["age_dr"].iat[-1], 7
333                    ),
334                    round(data_df[f"{rlon}_{rlat}_{year}"]["wr"].iat[-1], 7),
335                )
336
337                # # drop biomass age columns
338                # data_df[f"{rlon}_{rlat}_{year}"].drop(
339                #     columns=["age_gv", "age_gr", "age_dv", "age_dr"],
340                #     inplace=True
341                # )
342
343                # assign the output variables to the main Xarray dataset
344                for key in output_vars:
345                    tseries_y[key].loc[
346                        {
347                            xcoord: tseries_l.coords[xcoord],
348                            ycoord: tseries_l.coords[ycoord],
349                            "time": tseries_l.coords["time"],
350                        }
351                    ] = np.array(data_df[f"{rlon}_{rlat}_{year}"][key])
352
353        # delete input variables
354        tseries_y = tseries_y.drop_vars(list(tseries.data_vars))
355
356        # assign attributes for the data
357        tseries_y.attrs = {
358            "creation_date": str(datetime.now(tz=timezone.utc)),
359            "contact": "nstreethran@ucc.ie",
360            "frequency": "day",
361            "references": "https://github.com/ClimAg",
362            "input_dataset": tseries.attrs["dataset"],
363        }
364
365        # reassign CRS
366        tseries_y.rio.write_crs(model_vals["data_crs"], inplace=True)
367
368        # save as a NetCDF file
369        if "MERA" in input_timeseries_file:
370            model_vals["out_dir"] = os.path.join(out_dir, "MERA")
371        else:
372            model_vals["out_dir"] = os.path.join(
373                out_dir,
374                tseries.attrs["dataset"].split("_")[1],
375                tseries.attrs["dataset"].split("_")[4],
376                tseries.attrs["dataset"].split("_")[3],
377            )
378        os.makedirs(model_vals["out_dir"], exist_ok=True)
379        tseries_y.to_netcdf(
380            os.path.join(
381                model_vals["out_dir"],
382                f"modvege_{tseries.attrs['dataset']}_{year}.nc",
383            )
384        )
385
386        print(f"{year} complete...", datetime.now(tz=timezone.utc))

387
388

[docs]
389def run_modvege(
390    input_params_file, input_timeseries_file, out_dir, input_params_vector=None
391):
392    """Run ModVege
393
394    Parameters
395    ----------
396    input_params_file : str
397        File path for the input parameters
398    input_timeseries_file : str
399        File path for the input time series
400    out_dir : str
401        Directory to store output file(s)
402
403    Notes
404    -----
405    Preprocess the inputs to run ModVege as a function and save the results
406    as a CSV file
407    """
408    if input_timeseries_file.endswith(".csv"):
409        run_modvege_csv(input_timeseries_file, input_params_file, out_dir)
410    else:  # open the climate model dataset
411        run_modvege_nc(
412            input_timeseries_file,
413            input_params_file,
414            out_dir,
415            input_params_vector,
416        )