Census of Agriculture - Northern Ireland#
import os
from datetime import datetime, timezone
import pandas as pd
import pooch
URL = (
"https://admin.opendatani.gov.uk/dataset/"
"2a936744-dd04-457d-99b5-0000450af4fb/resource/"
"7c3fa1e0-fadf-4cd3-b3d4-7d500274f226/download/"
"farm-census---ward2014-2015-19-sup.csv"
)
KNOWN_HASH = None
FILE_NAME = "farm-census-ward2014.csv"
SUB_DIR = os.path.join("data", "agricultural_census", "DAERA")
DATA_FILE = os.path.join(SUB_DIR, FILE_NAME)
os.makedirs(SUB_DIR, exist_ok=True)
# download data if necessary
if not os.path.isfile(os.path.join(SUB_DIR, FILE_NAME)):
pooch.retrieve(
url=URL, known_hash=KNOWN_HASH, fname=FILE_NAME, path=SUB_DIR
)
with open(
os.path.join(SUB_DIR, f"{FILE_NAME[:-4]}.txt"), "w", encoding="utf-8"
) as outfile:
outfile.write(
f"Data downloaded on: {datetime.now(tz=timezone.utc)}\n"
f"Download URL: {URL}"
)
Downloading data from 'https://admin.opendatani.gov.uk/dataset/2a936744-dd04-457d-99b5-0000450af4fb/resource/7c3fa1e0-fadf-4cd3-b3d4-7d500274f226/download/farm-census---ward2014-2015-19-sup.csv' to file '/run/media/nms/Backup/Documents/Git/ClimAg/ClimAg/data/agricultural_census/DAERA/farm-census-ward2014.csv'.
SHA256 hash of downloaded file: 5c7add70b1e2c9b2d334a200e300b7fa84bacdb3226683a442ce387d87a9aa16
Use this value as the 'known_hash' argument of 'pooch.retrieve' to ensure that the file hasn't changed if it is downloaded again in the future.
coa = pd.read_csv(DATA_FILE, encoding_errors="replace")
coa.head()
| Ward2014 Code | Ward2014 Name | Year | Number of Farms | Number of lfa farms | Number of non lfa farms | Total area farmed in hectares | Average SO/farm (� '000 ) | Total crops in hectares | Total grass & rough grazing in hectares | Total number of cattle | Total number of sheep | Total number of pigs | Total number of poultry ('000) | Number of farmers | Number of self employed | Number of spouses | Number of Other workers | Total number of Agricultural labour | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | N08000101 | Abbey (Antrim and Newtownabbey) | 2015 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
| 1 | N08000102 | Aldergrove | 2015 | 103 | 5 | 98 | 6452 | 114 | 237 | 5933 | 10899 | 12756 | 1139 | 137 | 129 | 87 | 44 | 90 | 263 |
| 2 | N08000103 | Antrim Centre | 2015 | 5 | 3 | 3 | 41 | 164 | 0 | 40 | 42 | 0 | 3368 | 0 | 5 | 3 | 0 | 0 | 5 |
| 3 | N08000104 | Ballyclare East | 2015 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
| 4 | N08000105 | Ballyclare West | 2015 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 | 3 |
# filter grass, cattle, and sheep data
coa = coa[
[
"Ward2014 Code",
"Ward2014 Name",
"Year",
"Total grass & rough grazing in hectares ",
"Total number of cattle ",
"Total number of sheep ",
]
]
coa.head()
| Ward2014 Code | Ward2014 Name | Year | Total grass & rough grazing in hectares | Total number of cattle | Total number of sheep | |
|---|---|---|---|---|---|---|
| 0 | N08000101 | Abbey (Antrim and Newtownabbey) | 2015 | 3 | 3 | 3 |
| 1 | N08000102 | Aldergrove | 2015 | 5933 | 10899 | 12756 |
| 2 | N08000103 | Antrim Centre | 2015 | 40 | 42 | 0 |
| 3 | N08000104 | Ballyclare East | 2015 | 3 | 3 | 3 |
| 4 | N08000105 | Ballyclare West | 2015 | 3 | 3 | 3 |
# keep data for 2018
coa = coa[coa["Year"] == 2018]
# rename columns
coa.rename(
columns={
"Ward2014 Code": "ward_2014_code",
"Ward2014 Name": "ward_2014_name",
"Year": "year",
"Total grass & rough grazing in hectares ": "total_grass_hectares",
"Total number of cattle ": "total_cattle",
"Total number of sheep ": "total_sheep",
},
inplace=True,
)
coa.head()
| ward_2014_code | ward_2014_name | year | total_grass_hectares | total_cattle | total_sheep | |
|---|---|---|---|---|---|---|
| 1386 | N08000101 | Abbey (Antrim and Newtownabbey) | 2018 | 3 | 3 | 3 |
| 1387 | N08000102 | Aldergrove | 2018 | 5676 | 10784 | 15053 |
| 1388 | N08000103 | Antrim Centre | 2018 | 3 | 3 | 3 |
| 1389 | N08000104 | Ballyclare East | 2018 | 3 | 3 | 3 |
| 1390 | N08000105 | Ballyclare West | 2018 | 148 | 207 | 152 |
# check for missing data
coa.index[coa.isnull().any(axis=1)]
Int64Index([], dtype='int64')
# save as a CSV file
coa.to_csv(os.path.join(SUB_DIR, "daera_agricultural_census.csv"), index=False)