From 4e6cd8170cf7532e0f0cf64fc4d793e89df845b8 Mon Sep 17 00:00:00 2001
From: Christopher Johns <cjohns@scottlogic.com>
Date: Tue, 18 Jun 2024 11:37:01 +0100
Subject: [PATCH] Removed old scripts.

---
 bin/check_organisation_csv.py  | 129 ---------------------------------
 bin/create_organisation_csv.py |  49 -------------
 2 files changed, 178 deletions(-)
 delete mode 100755 bin/check_organisation_csv.py
 delete mode 100644 bin/create_organisation_csv.py

diff --git a/bin/check_organisation_csv.py b/bin/check_organisation_csv.py
deleted file mode 100755
index 0d2a049e..00000000
--- a/bin/check_organisation_csv.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python3
-
-# check the integrity of the organisation.csv file
-
-import sys
-import click
-import re
-import csv
-from digital_land.specification import Specification
-
-
-organisations = {}
-specification = Specification()
-lpas = {}
-entities = {}
-wikidatas = {}
-bas = {}
-odcs = {}
-curies = {}
-issues = []
-
-
-def load_lpas(path):
-    for row in csv.DictReader(open(path)):
-        lpas[row["reference"]] = row
-
-
-def load(path):
-    for row in csv.DictReader(open(path)):
-        curie = row.get("organisation", "")
-        if not curie:
-            curie = f'{row["prefix"]}:{row["reference"]}'
-        organisations.setdefault(curie, {})
-        for field, value in row.items():
-            if value:
-                organisations[curie][field] = value
-
-
-def log_issue(severity, row, issue, field="", value=""):
-    line = {
-        "datapackage": "organisation",
-        "entity": row["entity"],
-        "prefix": row["prefix"],
-        "reference": row["reference"],
-        "severity": severity,
-        "issue": issue,
-        "field": field,
-        "value": value,
-    }
-    if severity in ["critical", "error"]:
-        print(f'{line["severity"]} {line["prefix"]}:{line["reference"]} {issue} {field} {value}', file=sys.stderr)
-    issues.append(line)
-
-
-def save_issues(path):
-    fieldnames = ["datapackage", "severity", "entity", "prefix", "reference", "issue", "field", "value"]
-    w = csv.DictWriter(open(path, "w"), fieldnames=fieldnames, extrasaction="ignore")
-    w.writeheader()
-    for row in issues:
-        w.writerow(row)
-
-
-def check():
-    for organisation, row in organisations.items():
-
-        # look for duplicate entities
-        if row["entity"] in entities:
-            log_issue("error", row, "duplicate entity")
-        else:
-            entities[row["entity"]] = organisation
-
-        # check wikidata
-        wikidata = row.get("wikidata", "")
-        if wikidata and wikidata in wikidatas:
-            severity = "warning" if row["entity"] in ["600001"] else "error"
-            log_issue(severity, row, "duplicate value", field="wikidata", value=row["wikidata"])
-        else:
-            wikidatas[row["wikidata"]] = organisation
-
-        # check LPA value against dataset
-        lpa = row.get("local-planning-authority", "")
-        if not lpa:
-            if (row["dataset"] in ["local-authority", "national-park-authority"]) and (
-                row.get("local-authority-type", "") not in ["CTY", "COMB", "SRA"]
-            ):
-                severity = "warning" if row.get("end-date", "") else "error"
-                log_issue(severity, row, "missing", field="local-planning-authority")
-        elif lpa not in lpas:
-            log_issue("error", row, "unknown", field="local-planning-authority", value=lpa)
-        else:
-            lpas[lpa]["organisation"] = organisation
-
-        # check billing-authority
-        ba = row.get("billing-authority", "")
-        if not ba:
-            if row["dataset"] not in ["government-organisation"]:
-                severity = "warning" if row.get("end-date", "") else "error"
-                log_issue(severity, row, "missing", field="billing-authority")
-        elif ba in bas:
-            log_issue("error", row, "duplicate value", field="billing-authority", value=row["billing-authority"])
-        else:
-            bas[row["billing-authority"]] = organisation
-
-        # check opendatacommunities-uri
-        odc = row.get("opendatacommunities-uri", "")
-        if not odc:
-            if row["dataset"] not in ["government-organisation"]:
-                severity = "warning" if row.get("end-date", "") else "error"
-                log_issue(severity, row, "missing", field="opendatacommunities-uri")
-        elif odc in odcs:
-            log_issue("error", row, "duplicate value", field="opendatacommunities-uri", value=row["opendatacommunities-uri"])
-        else:
-            odcs[row["opendatacommunities-uri"]] = organisation
-
-
-
-@click.command()
-@click.option(
-    "--output-path", type=click.Path(), default="dataset/organisation-check.csv"
-)
-def cli(output_path):
-    load_lpas("var/cache/local-planning-authority.csv")
-    load("dataset/organisation.csv")
-    check()
-    save_issues(output_path)
-
-
-if __name__ == "__main__":
-    cli()
diff --git a/bin/create_organisation_csv.py b/bin/create_organisation_csv.py
deleted file mode 100644
index 6ab32c3f..00000000
--- a/bin/create_organisation_csv.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import click
-from os import listdir
-from pathlib import Path
-import csv
-
-from digital_land.specification import Specification
-
-@click.command()
-@click.option("--flattened-dir", type=click.Path(exists=True), default="flattened/")
-@click.option(
-    "--specification-dir", type=click.Path(exists=True), default="specification/"
-)
-@click.option("--output-path", type=click.Path(), default="dataset/organisation.csv")
-def create_org_csv_cli(flattened_dir, specification_dir,output_path):
-    specification = Specification(path=specification_dir)
-
-    # get field names
-    org_field_names = specification.schema_field['organisation']
-
-    
-    # get get file list
-    filenames = listdir(flattened_dir)
-    filenames = [ filename for filename in filenames if filename.endswith('.csv') ]
-    
-    orgs = []
-    for file in filenames:
-        filepath = Path(flattened_dir)/ file
-        with open(filepath, newline="") as f:
-            for row in csv.DictReader(f):
-                # hack to replace "_" with "-" in fieldnames
-                if row['typology'] == 'organisation':
-                    row = { k.replace("_", "-"): v for k, v in row.items() }
-                    if not row.get('organisation',None):
-                        row['organisation'] = row['dataset'] + ':' + row['reference']
-                    org = {k:v for k,v in row.items() if k  in org_field_names}
-                    orgs.append(org)
-
-    # write list of dicts
-    output_path = Path(output_path)
-    with open(output_path, "w", newline="") as f:
-        w = csv.DictWriter(f, fieldnames=org_field_names, extrasaction='ignore')
-        w.writeheader()
-        w.writerows(orgs)
-
-    return
-
-
-if __name__ == "__main__":
-    create_org_csv_cli()