Census of Agriculture - Northern Ireland#

import os
from datetime import datetime, timezone
import pandas as pd
import pooch
URL = (
    "https://admin.opendatani.gov.uk/dataset/"
    "2a936744-dd04-457d-99b5-0000450af4fb/resource/"
    "7c3fa1e0-fadf-4cd3-b3d4-7d500274f226/download/"
    "farm-census---ward2014-2015-19-sup.csv"
)
KNOWN_HASH = None
FILE_NAME = "farm-census-ward2014.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "DAERA")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
    pooch.retrieve(
        url=URL, known_hash=KNOWN_HASH, fname=FILE_NAME, path=SUB_DIR
    )

    with open(
        os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
    ) as outfile:
        outfile.write(
            f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
            f"Download URL: {URL}"
        )
Downloading data from 'https://admin.opendatani.gov.uk/dataset/2a936744-dd04-457d-99b5-0000450af4fb/resource/7c3fa1e0-fadf-4cd3-b3d4-7d500274f226/download/farm-census---ward2014-2015-19-sup.csv' to file '/run/media/nms/Backup/Documents/Git/ClimAg/ClimAg/data/agricultural_census/DAERA/farm-census-ward2014.csv'.
SHA256 hash of downloaded file: 5c7add70b1e2c9b2d334a200e300b7fa84bacdb3226683a442ce387d87a9aa16
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.
coa = pd.read_csv(DATA_FILE, encoding_errors="replace")
coa.head()
Ward2014 Code Ward2014 Name Year Number of Farms Number of lfa farms Number of non lfa farms Total area farmed in hectares Average SO/farm (� '000 ) Total crops in hectares Total grass & rough grazing in hectares Total number of cattle Total number of sheep Total number of pigs Total number of poultry ('000) Number of farmers Number of self employed Number of spouses Number of Other workers Total number of Agricultural labour
0 N08000101 Abbey (Antrim and Newtownabbey) 2015 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
1 N08000102 Aldergrove 2015 103 5 98 6452 114 237 5933 10899 12756 1139 137 129 87 44 90 263
2 N08000103 Antrim Centre 2015 5 3 3 41 164 0 40 42 0 3368 0 5 3 0 0 5
3 N08000104 Ballyclare East 2015 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
4 N08000105 Ballyclare West 2015 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
# filter grass, cattle, and sheep data
coa = coa[
    [
        "Ward2014 Code",
        "Ward2014 Name",
        "Year",
        "Total grass & rough grazing in hectares  ",
        "Total number of cattle  ",
        "Total number of sheep  ",
    ]
]
coa.head()
Ward2014 Code Ward2014 Name Year Total grass & rough grazing in hectares Total number of cattle Total number of sheep
0 N08000101 Abbey (Antrim and Newtownabbey) 2015 3 3 3
1 N08000102 Aldergrove 2015 5933 10899 12756
2 N08000103 Antrim Centre 2015 40 42 0
3 N08000104 Ballyclare East 2015 3 3 3
4 N08000105 Ballyclare West 2015 3 3 3
# keep data for 2018
coa = coa[coa["Year"] == 2018]
# rename columns
coa.rename(
    columns={
        "Ward2014 Code": "ward_2014_code",
        "Ward2014 Name": "ward_2014_name",
        "Year": "year",
        "Total grass & rough grazing in hectares  ": "total_grass_hectares",
        "Total number of cattle  ": "total_cattle",
        "Total number of sheep  ": "total_sheep",
    },
    inplace=True,
)
coa.head()
ward_2014_code ward_2014_name year total_grass_hectares total_cattle total_sheep
1386 N08000101 Abbey (Antrim and Newtownabbey) 2018 3 3 3
1387 N08000102 Aldergrove 2018 5676 10784 15053
1388 N08000103 Antrim Centre 2018 3 3 3
1389 N08000104 Ballyclare East 2018 3 3 3
1390 N08000105 Ballyclare West 2018 148 207 152
# check for missing data
coa.index[coa.isnull().any(axis=1)]
Int64Index([], dtype='int64')
# save as a CSV file
coa.to_csv(os.path.join(SUB_DIR, "daera_agricultural_census.csv"), index=False)