Census of Agriculture - Republic of Ireland#

import os
from datetime import datetime, timezone
import pandas as pd
import pooch

Farms with Livestock#

https://data.cso.ie/table/AVA42

URL = (
    "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/"
    "AVA42/CSV/1.0/en"
)
KNOWN_HASH = None
FILE_NAME = "COA_2020_AVA42.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "CSO")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
    pooch.retrieve(
        url=URL, known_hash=KNOWN_HASH, fname=FILE_NAME, path=SUB_DIR
    )

    with open(
        os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
    ) as outfile:
        outfile.write(
            f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
            f"Download URL: {URL}"
        )
coa = pd.read_csv(DATA_FILE)
coa.head()
STATISTIC Statistic Label TLIST(A1) Census Year C03904V04656 Electoral Division C02148V02965 Type of Livestock UNIT VALUE
0 COALS Livestock on Farms 2000 2000 - State 1 Total cattle Number 7037435
1 COALS Livestock on Farms 2000 2000 - State 1111 Dairy cows Number 1177452
2 COALS Livestock on Farms 2000 2000 - State 1112 Other cows Number 1186989
3 COALS Livestock on Farms 2000 2000 - State 2 Total sheep Number 7555044
4 COALS Livestock on Farms 2000 2000 - State 9999999 Livestock Units (LSU) Number 5941304.7
# filter for 2020, for total cattle and total sheep
# drop the state numbers
coa = coa[coa["Census Year"] == 2020]
coa = coa[coa["Type of Livestock"].isin(["Total cattle", "Total sheep"])]
coa = coa[coa["Electoral Division"] != "State"]
coa.head()
STATISTIC Statistic Label TLIST(A1) Census Year C03904V04656 Electoral Division C02148V02965 Type of Livestock UNIT VALUE
34105 COALS Livestock on Farms 2020 2020 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 1 Total cattle Number 1955
34108 COALS Livestock on Farms 2020 2020 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 2 Total sheep Number 420
34110 COALS Livestock on Farms 2020 2020 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 1 Total cattle Number 2164
34113 COALS Livestock on Farms 2020 2020 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 2 Total sheep Number 675
34115 COALS Livestock on Farms 2020 2020 2AE1962918C813A3E055000000000001 Fair Gate, Co.Louth, 10001 1 Total cattle Number 0
# drop unnecessary columns
coa.drop(
    columns=[
        "STATISTIC",
        "Statistic Label",
        "TLIST(A1)",
        "C02148V02965",
        "UNIT",
        "Census Year",
    ],
    inplace=True,
)
# split cattle and sheep values into separate columns
coa = pd.merge(
    coa[coa["Type of Livestock"] == "Total cattle"],
    coa[coa["Type of Livestock"] == "Total sheep"],
    on=["C03904V04656", "Electoral Division"],
)
coa.head()
C03904V04656 Electoral Division Type of Livestock_x VALUE_x Type of Livestock_y VALUE_y
0 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 Total cattle 1955 Total sheep 420
1 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 Total cattle 2164 Total sheep 675
2 2AE1962918C813A3E055000000000001 Fair Gate, Co.Louth, 10001 Total cattle 0 Total sheep 0
3 2AE19629193513A3E055000000000001 St. Laurence Gate, Co.Louth, 10002 Total cattle 0 Total sheep 0
4 2AE19629191E13A3E055000000000001 West Gate, Co.Louth, 10003 Total cattle 0 Total sheep 0
# rename columns
coa.rename(
    columns={
        "Electoral Division": "electoral_division",
        "VALUE_x": "total_cattle",
        "VALUE_y": "total_sheep",
    },
    inplace=True,
)
# drop unnecessary columns
coa.drop(columns=["Type of Livestock_x", "Type of Livestock_y"], inplace=True)
coa.head()
C03904V04656 electoral_division total_cattle total_sheep
0 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 1955 420
1 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 2164 675
2 2AE1962918C813A3E055000000000001 Fair Gate, Co.Louth, 10001 0 0
3 2AE19629193513A3E055000000000001 St. Laurence Gate, Co.Louth, 10002 0 0
4 2AE19629191E13A3E055000000000001 West Gate, Co.Louth, 10003 0 0

Land Utilisation#

https://data.cso.ie/table/AVA44

URL = (
    "https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/"
    "AVA44/CSV/1.0/en"
)
KNOWN_HASH = None
FILE_NAME = "COA_2020_AVA44.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "CSO")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
    pooch.retrieve(
        url=URL, known_hash=KNOWN_HASH, fname=FILE_NAME, path=SUB_DIR
    )

    with open(
        os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
    ) as outfile:
        outfile.write(
            f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
            f"Download URL: {URL}"
        )
Downloading data from 'https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/AVA44/CSV/1.0/en' to file '/run/media/nms/Backup/Documents/Git/ClimAg/ClimAg/data/AgriculturalCensus/CSO/COA_2020_AVA44.csv'.
SHA256 hash of downloaded file: cb5cd56a705f4dbd5561b77ca6cd20d708b9014eb5c24e75ab4123a9d8d33203
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.
land = pd.read_csv(DATA_FILE)
land.head()
STATISTIC Statistic Label TLIST(A1) Census Year C03904V04656 Electoral Division C02145V02588 Type of Crop UNIT VALUE
0 COALU Area of selected crops 2000 2000 - State 1 Area farmed (AAU) Hectares 4443071
1 COALU Area of selected crops 2000 2000 - State 11430 Total cereals Hectares 279045
2 COALU Area of selected crops 2000 2000 - State 11114 All grassland Hectares 4041947
3 COALU Area of selected crops 2000 2000 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 1 Area farmed (AAU) Hectares 966.4
4 COALU Area of selected crops 2000 2000 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 11430 Total cereals Hectares 2.2
# filter for 2020, for all grassland
# drop the state numbers
land = land[land["Census Year"] == 2020]
land = land[land["Type of Crop"] == "All grassland"]
land = land[land["Electoral Division"] != "State"]
land.head()
STATISTIC Statistic Label TLIST(A1) Census Year C03904V04656 Electoral Division C02145V02588 Type of Crop UNIT VALUE
20465 COALU Area of selected crops 2020 2020 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 11114 All grassland Hectares 964.3
20468 COALU Area of selected crops 2020 2020 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 11114 All grassland Hectares 965.4
20471 COALU Area of selected crops 2020 2020 2AE1962918C813A3E055000000000001 Fair Gate, Co.Louth, 10001 11114 All grassland Hectares 0
20474 COALU Area of selected crops 2020 2020 2AE19629193513A3E055000000000001 St. Laurence Gate, Co.Louth, 10002 11114 All grassland Hectares 0
20477 COALU Area of selected crops 2020 2020 2AE19629191E13A3E055000000000001 West Gate, Co.Louth, 10003 11114 All grassland Hectares 0
# rename columns
land.rename(
    columns={
        "Electoral Division": "electoral_division",
        "VALUE": "total_grass_hectares",
    },
    inplace=True,
)
# keep only necessary columns
land = land[["C03904V04656", "electoral_division", "total_grass_hectares"]]
land.head()
C03904V04656 electoral_division total_grass_hectares
20465 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 964.3
20468 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 965.4
20471 2AE1962918C813A3E055000000000001 Fair Gate, Co.Louth, 10001 0
20474 2AE19629193513A3E055000000000001 St. Laurence Gate, Co.Louth, 10002 0
20477 2AE19629191E13A3E055000000000001 West Gate, Co.Louth, 10003 0

Merge datasets#

data = pd.merge(coa, land, on=["C03904V04656", "electoral_division"])
data.head()
C03904V04656 electoral_division total_cattle total_sheep total_grass_hectares
0 3576C59993454152E0530100007F0280 Brisha / Capard, Co.Laois, 08045/08046 1955 420 964.3
1 3576C59992AD4152E0530100007F0280 Firry / Newgrove, Co.Longford, 09024/09035 2164 675 965.4
2 2AE1962918C813A3E055000000000001 Fair Gate, Co.Louth, 10001 0 0 0
3 2AE19629193513A3E055000000000001 St. Laurence Gate, Co.Louth, 10002 0 0 0
4 2AE19629191E13A3E055000000000001 West Gate, Co.Louth, 10003 0 0 0
# check for missing data
data.index[data.isnull().any(axis=1)]
Int64Index([   6,   10,   16,   23,   26,   30,   36,   42,   46,   47,
            ...
            3370, 3376, 3378, 3383, 3384, 3386, 3393, 3396, 3400, 3403],
           dtype='int64', length=935)
# fill with zero
data.fillna(0, inplace=True)
data.index[data.isnull().any(axis=1)]
Int64Index([], dtype='int64')
# save as a CSV file
data.to_csv(os.path.join(SUB_DIR, "COA_2020.csv"), index=False)