ModVege results - Moorepark time series distribution#

import os
import glob
import itertools
import numpy as np
from datetime import datetime, timezone
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import xarray as xr
import climag.climag_plot as cplt
# met station coords
# Wexford,4015,ENNISCORTHY (Brownswood),18,297870,135550,1983,(null)
LON, LAT = -6.56083, 52.46306
# Ireland boundary
GPKG_BOUNDARY = os.path.join("data", "boundaries", "boundaries_all.gpkg")
ie_bbox = gpd.read_file(GPKG_BOUNDARY, layer="ne_10m_land_2157_IE_BBOX_DIFF")
BASE_DIR = os.path.join("data", "ModVege", "EURO-CORDEX")
TEMP_DIR = os.path.join(BASE_DIR, "stats")
os.makedirs(TEMP_DIR, exist_ok=True)
datasets = {}

for exp, model, data in itertools.product(
    ["historical", "rcp45", "rcp85"],
    ["CNRM-CM5", "EC-EARTH", "HadGEM2-ES", "MPI-ESM-LR"],
    ["EURO-CORDEX", "HiResIreland"],
):
    # auto-rechunking may cause NotImplementedError with object dtype
    # where it will not be able to estimate the size in bytes of object data
    if model == "HadGEM2-ES":
        CHUNKS = 300
    else:
        CHUNKS = "auto"

    datasets[f"{data}_{model}_{exp}"] = xr.open_mfdataset(
        glob.glob(
            os.path.join(
                "data",
                "ModVege",
                data,
                exp,
                model,
                f"*{data}*{model}*{exp}*.nc",
            )
        ),
        chunks=CHUNKS,
        decode_coords="all",
    )

    # convert HadGEM2-ES data back to 360-day calendar
    # this ensures that the correct weighting is applied when calculating
    # the weighted average
    if model == "HadGEM2-ES":
        datasets[f"{data}_{model}_{exp}"] = datasets[
            f"{data}_{model}_{exp}"
        ].convert_calendar("360_day", align_on="year")

# remove spin-up year
for key in datasets.keys():
    if "historical" in key:
        datasets[key] = datasets[key].sel(time=slice("1976", "2005"))
    else:
        datasets[key] = datasets[key].sel(time=slice("2041", "2070"))
    # # normalise to keep only date in time
    # datasets[key]["time"] = datasets[key].indexes["time"].normalize()
list(datasets.keys())
['EURO-CORDEX_CNRM-CM5_historical',
 'HiResIreland_CNRM-CM5_historical',
 'EURO-CORDEX_EC-EARTH_historical',
 'HiResIreland_EC-EARTH_historical',
 'EURO-CORDEX_HadGEM2-ES_historical',
 'HiResIreland_HadGEM2-ES_historical',
 'EURO-CORDEX_MPI-ESM-LR_historical',
 'HiResIreland_MPI-ESM-LR_historical',
 'EURO-CORDEX_CNRM-CM5_rcp45',
 'HiResIreland_CNRM-CM5_rcp45',
 'EURO-CORDEX_EC-EARTH_rcp45',
 'HiResIreland_EC-EARTH_rcp45',
 'EURO-CORDEX_HadGEM2-ES_rcp45',
 'HiResIreland_HadGEM2-ES_rcp45',
 'EURO-CORDEX_MPI-ESM-LR_rcp45',
 'HiResIreland_MPI-ESM-LR_rcp45',
 'EURO-CORDEX_CNRM-CM5_rcp85',
 'HiResIreland_CNRM-CM5_rcp85',
 'EURO-CORDEX_EC-EARTH_rcp85',
 'HiResIreland_EC-EARTH_rcp85',
 'EURO-CORDEX_HadGEM2-ES_rcp85',
 'HiResIreland_HadGEM2-ES_rcp85',
 'EURO-CORDEX_MPI-ESM-LR_rcp85',
 'HiResIreland_MPI-ESM-LR_rcp85']
datasets["EURO-CORDEX_EC-EARTH_rcp45"]
<xarray.Dataset>
Dimensions:       (rlat: 33, rlon: 37, time: 10957, bnds: 2)
Coordinates:
    lat           (rlat, rlon) float64 dask.array<chunksize=(33, 37), meta=np.ndarray>
    lon           (rlat, rlon) float64 dask.array<chunksize=(33, 37), meta=np.ndarray>
  * rlat          (rlat) float64 3.685 3.795 3.905 4.015 ... 6.985 7.095 7.205
  * rlon          (rlon) float64 -17.27 -17.16 -17.05 ... -13.53 -13.41 -13.3
  * time          (time) datetime64[ns] 2041-01-01T12:00:00 ... 2070-12-31T12...
    height        float64 2.0
    rotated_pole  |S1 b''
    time_bnds     (time, bnds) datetime64[ns] dask.array<chunksize=(365, 2), meta=np.ndarray>
    spatial_ref   int64 0
Dimensions without coordinates: bnds
Data variables: (12/24)
    bm_gv         (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    bm_gr         (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    bm_dv         (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    bm_dr         (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    age_gv        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    age_gr        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    ...            ...
    sen_gv        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    sen_gr        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    abs_dv        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    abs_dr        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    omd_gv        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
    omd_gr        (time, rlat, rlon) float32 dask.array<chunksize=(365, 33, 37), meta=np.ndarray>
Attributes:
    creation_date:  2023-03-11 00:11:26.834262+00:00
    contact:        nstreethran@ucc.ie
    frequency:      day
    references:     https://github.com/ClimAg
    input_dataset:  IE_EURO-CORDEX_RCA4_EC-EARTH_rcp45
varlist = ["bm", "pgro", "gro", "i_bm"]

Box plots#

data_all = cplt.boxplot_data(
    datasets=datasets, varlist=varlist, lonlat=(LON, LAT)
)
for var in varlist:
    cplt.boxplot_all(
        data=data_all[var],
        var=var,
        title=(
            datasets["EURO-CORDEX_EC-EARTH_rcp45"][var].attrs["long_name"]
            + f" [{datasets['EURO-CORDEX_EC-EARTH_rcp45'][var].attrs['units']}]"
        ),
    )
../_images/e9f007cdaf44926b360d5addbf364f035dede9bd0e4624f4c9ddb8a687b0e9f0.png ../_images/ed2d4d3a23de45f77d0bece96e1896a714210d61a77587396d51a4e296838668.png ../_images/25c14f64699806140db6c0dc7b73b39ab279f2fd5d7f5e1480b0009090cb31f4.png ../_images/7183c633eb541c4b651492c0691b74a53c8887e18cd8bb2a6976d00b913ba049.png

Histograms#

for var in varlist:
    data_pivot = pd.pivot_table(
        data_all[var], values=var, columns="legend", index=data_all[var].index
    )
    data_pivot.plot(
        kind="hist",
        subplots=True,
        figsize=(8, 18),
        bins=50,
        sharex=True,
        sharey=True,
        layout=(8, 3),
        title=(
            datasets["EURO-CORDEX_EC-EARTH_rcp45"][var].attrs["long_name"]
            + f" [{datasets['EURO-CORDEX_EC-EARTH_rcp45'][var].attrs['units']}]"
        ),
    )
    plt.tight_layout()
    plt.show()
../_images/498970a843db6504a2847c184aa3645a471e41103249acf5a9bf04762c3abab5.png ../_images/666c842a90b2625ae3dbed93059b0873cc022d9f46ec7729663f8f6211cde284.png ../_images/a1acb7e647541c207362da5cd172b3abbe718469d8aaa597d22a7adfaa2a02b1.png ../_images/e6ef281b4302f19ba7b0139aa823597907377cc12fbe3539b494e0270f0d1853.png