Census of Agriculture - Republic of Ireland#
import os
from datetime import datetime, timezone
import pandas as pd
import pooch
Farms with Livestock#
https://data.cso.ie/table/AVA42
URL = (
"https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/"
"AVA42/CSV/1.0/en"
)
KNOWN_HASH = None
FILE_NAME = "COA_2020_AVA42.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "CSO")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
pooch.retrieve(
url=URL, known_hash=KNOWN_HASH, fname=FILE_NAME, path=SUB_DIR
)
with open(
os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
) as outfile:
outfile.write(
f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
f"Download URL: {URL}"
)
coa = pd.read_csv(DATA_FILE)
coa.head()
| STATISTIC | Statistic Label | TLIST(A1) | Census Year | C03904V04656 | Electoral Division | C02148V02965 | Type of Livestock | UNIT | VALUE | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | COALS | Livestock on Farms | 2000 | 2000 | - | State | 1 | Total cattle | Number | 7037435 |
| 1 | COALS | Livestock on Farms | 2000 | 2000 | - | State | 1111 | Dairy cows | Number | 1177452 |
| 2 | COALS | Livestock on Farms | 2000 | 2000 | - | State | 1112 | Other cows | Number | 1186989 |
| 3 | COALS | Livestock on Farms | 2000 | 2000 | - | State | 2 | Total sheep | Number | 7555044 |
| 4 | COALS | Livestock on Farms | 2000 | 2000 | - | State | 9999999 | Livestock Units (LSU) | Number | 5941304.7 |
# filter for 2020, for total cattle and total sheep
# drop the state numbers
coa = coa[coa["Census Year"] == 2020]
coa = coa[coa["Type of Livestock"].isin(["Total cattle", "Total sheep"])]
coa = coa[coa["Electoral Division"] != "State"]
coa.head()
| STATISTIC | Statistic Label | TLIST(A1) | Census Year | C03904V04656 | Electoral Division | C02148V02965 | Type of Livestock | UNIT | VALUE | |
|---|---|---|---|---|---|---|---|---|---|---|
| 34105 | COALS | Livestock on Farms | 2020 | 2020 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 1 | Total cattle | Number | 1955 |
| 34108 | COALS | Livestock on Farms | 2020 | 2020 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 2 | Total sheep | Number | 420 |
| 34110 | COALS | Livestock on Farms | 2020 | 2020 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | 1 | Total cattle | Number | 2164 |
| 34113 | COALS | Livestock on Farms | 2020 | 2020 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | 2 | Total sheep | Number | 675 |
| 34115 | COALS | Livestock on Farms | 2020 | 2020 | 2AE1962918C813A3E055000000000001 | Fair Gate, Co.Louth, 10001 | 1 | Total cattle | Number | 0 |
# drop unnecessary columns
coa.drop(
columns=[
"STATISTIC",
"Statistic Label",
"TLIST(A1)",
"C02148V02965",
"UNIT",
"Census Year",
],
inplace=True,
)
# split cattle and sheep values into separate columns
coa = pd.merge(
coa[coa["Type of Livestock"] == "Total cattle"],
coa[coa["Type of Livestock"] == "Total sheep"],
on=["C03904V04656", "Electoral Division"],
)
coa.head()
| C03904V04656 | Electoral Division | Type of Livestock_x | VALUE_x | Type of Livestock_y | VALUE_y | |
|---|---|---|---|---|---|---|
| 0 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | Total cattle | 1955 | Total sheep | 420 |
| 1 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | Total cattle | 2164 | Total sheep | 675 |
| 2 | 2AE1962918C813A3E055000000000001 | Fair Gate, Co.Louth, 10001 | Total cattle | 0 | Total sheep | 0 |
| 3 | 2AE19629193513A3E055000000000001 | St. Laurence Gate, Co.Louth, 10002 | Total cattle | 0 | Total sheep | 0 |
| 4 | 2AE19629191E13A3E055000000000001 | West Gate, Co.Louth, 10003 | Total cattle | 0 | Total sheep | 0 |
# rename columns
coa.rename(
columns={
"Electoral Division": "electoral_division",
"VALUE_x": "total_cattle",
"VALUE_y": "total_sheep",
},
inplace=True,
)
# drop unnecessary columns
coa.drop(columns=["Type of Livestock_x", "Type of Livestock_y"], inplace=True)
coa.head()
| C03904V04656 | electoral_division | total_cattle | total_sheep | |
|---|---|---|---|---|
| 0 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 1955 | 420 |
| 1 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | 2164 | 675 |
| 2 | 2AE1962918C813A3E055000000000001 | Fair Gate, Co.Louth, 10001 | 0 | 0 |
| 3 | 2AE19629193513A3E055000000000001 | St. Laurence Gate, Co.Louth, 10002 | 0 | 0 |
| 4 | 2AE19629191E13A3E055000000000001 | West Gate, Co.Louth, 10003 | 0 | 0 |
Land Utilisation#
https://data.cso.ie/table/AVA44
URL = (
"https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/"
"AVA44/CSV/1.0/en"
)
KNOWN_HASH = None
FILE_NAME = "COA_2020_AVA44.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "CSO")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
pooch.retrieve(
url=URL, known_hash=KNOWN_HASH, fname=FILE_NAME, path=SUB_DIR
)
with open(
os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
) as outfile:
outfile.write(
f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
f"Download URL: {URL}"
)
Downloading data from 'https://ws.cso.ie/public/api.restful/PxStat.Data.Cube_API.ReadDataset/AVA44/CSV/1.0/en' to file '/run/media/nms/Backup/Documents/Git/ClimAg/ClimAg/data/AgriculturalCensus/CSO/COA_2020_AVA44.csv'.
SHA256 hash of downloaded file: cb5cd56a705f4dbd5561b77ca6cd20d708b9014eb5c24e75ab4123a9d8d33203
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.
land = pd.read_csv(DATA_FILE)
land.head()
| STATISTIC | Statistic Label | TLIST(A1) | Census Year | C03904V04656 | Electoral Division | C02145V02588 | Type of Crop | UNIT | VALUE | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | COALU | Area of selected crops | 2000 | 2000 | - | State | 1 | Area farmed (AAU) | Hectares | 4443071 |
| 1 | COALU | Area of selected crops | 2000 | 2000 | - | State | 11430 | Total cereals | Hectares | 279045 |
| 2 | COALU | Area of selected crops | 2000 | 2000 | - | State | 11114 | All grassland | Hectares | 4041947 |
| 3 | COALU | Area of selected crops | 2000 | 2000 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 1 | Area farmed (AAU) | Hectares | 966.4 |
| 4 | COALU | Area of selected crops | 2000 | 2000 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 11430 | Total cereals | Hectares | 2.2 |
# filter for 2020, for all grassland
# drop the state numbers
land = land[land["Census Year"] == 2020]
land = land[land["Type of Crop"] == "All grassland"]
land = land[land["Electoral Division"] != "State"]
land.head()
| STATISTIC | Statistic Label | TLIST(A1) | Census Year | C03904V04656 | Electoral Division | C02145V02588 | Type of Crop | UNIT | VALUE | |
|---|---|---|---|---|---|---|---|---|---|---|
| 20465 | COALU | Area of selected crops | 2020 | 2020 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 11114 | All grassland | Hectares | 964.3 |
| 20468 | COALU | Area of selected crops | 2020 | 2020 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | 11114 | All grassland | Hectares | 965.4 |
| 20471 | COALU | Area of selected crops | 2020 | 2020 | 2AE1962918C813A3E055000000000001 | Fair Gate, Co.Louth, 10001 | 11114 | All grassland | Hectares | 0 |
| 20474 | COALU | Area of selected crops | 2020 | 2020 | 2AE19629193513A3E055000000000001 | St. Laurence Gate, Co.Louth, 10002 | 11114 | All grassland | Hectares | 0 |
| 20477 | COALU | Area of selected crops | 2020 | 2020 | 2AE19629191E13A3E055000000000001 | West Gate, Co.Louth, 10003 | 11114 | All grassland | Hectares | 0 |
# rename columns
land.rename(
columns={
"Electoral Division": "electoral_division",
"VALUE": "total_grass_hectares",
},
inplace=True,
)
# keep only necessary columns
land = land[["C03904V04656", "electoral_division", "total_grass_hectares"]]
land.head()
| C03904V04656 | electoral_division | total_grass_hectares | |
|---|---|---|---|
| 20465 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 964.3 |
| 20468 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | 965.4 |
| 20471 | 2AE1962918C813A3E055000000000001 | Fair Gate, Co.Louth, 10001 | 0 |
| 20474 | 2AE19629193513A3E055000000000001 | St. Laurence Gate, Co.Louth, 10002 | 0 |
| 20477 | 2AE19629191E13A3E055000000000001 | West Gate, Co.Louth, 10003 | 0 |
Merge datasets#
data = pd.merge(coa, land, on=["C03904V04656", "electoral_division"])
data.head()
| C03904V04656 | electoral_division | total_cattle | total_sheep | total_grass_hectares | |
|---|---|---|---|---|---|
| 0 | 3576C59993454152E0530100007F0280 | Brisha / Capard, Co.Laois, 08045/08046 | 1955 | 420 | 964.3 |
| 1 | 3576C59992AD4152E0530100007F0280 | Firry / Newgrove, Co.Longford, 09024/09035 | 2164 | 675 | 965.4 |
| 2 | 2AE1962918C813A3E055000000000001 | Fair Gate, Co.Louth, 10001 | 0 | 0 | 0 |
| 3 | 2AE19629193513A3E055000000000001 | St. Laurence Gate, Co.Louth, 10002 | 0 | 0 | 0 |
| 4 | 2AE19629191E13A3E055000000000001 | West Gate, Co.Louth, 10003 | 0 | 0 | 0 |
# check for missing data
data.index[data.isnull().any(axis=1)]
Int64Index([ 6, 10, 16, 23, 26, 30, 36, 42, 46, 47,
...
3370, 3376, 3378, 3383, 3384, 3386, 3393, 3396, 3400, 3403],
dtype='int64', length=935)
# fill with zero
data.fillna(0, inplace=True)
data.index[data.isnull().any(axis=1)]
Int64Index([], dtype='int64')
# save as a CSV file
data.to_csv(os.path.join(SUB_DIR, "COA_2020.csv"), index=False)