Skip to content

Commit

Permalink
Don't include facts with geometry issues (#306)
Browse files Browse the repository at this point in the history
  • Loading branch information
cjohns-scottlogic authored Dec 10, 2024
1 parent 6c2ed36 commit cdc48cb
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 0 deletions.
4 changes: 4 additions & 0 deletions digital_land/phase/harmonise.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def process(self, stream):
point_geometry = shapely.wkt.loads(geometry)
x, y = point_geometry.coords[0]
(o["GeoX"], o["GeoY"]) = [str(x), str(y)]
else:
# Remove the invalid point
del o["GeoX"]
del o["GeoY"]
except Exception as e:
logger.error(
f"Exception occurred while fetching geoX, geoY coordinates: {e}"
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/phase/test_harmonise.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,27 @@ def test_validate_categorical_field_dataset():
assert len(issues.rows) == 1
# but we get an issue generated
assert issues.rows[0]["issue-type"] == "invalid category value"


def test_harmonise_geox_geoy():
field_datatye_map = {
"GeoX": "string",
"GeoY": "string",
}
issues = IssueLog()

h = HarmonisePhase(field_datatype_map=field_datatye_map, issues=issues)
reader = FakeDictReader(
[
{"GeoX": "-1.543611", "GeoY": "53.7975"},
{"GeoX": "3.141329", "GeoY": "42.25472"},
]
)
output = list(h.process(reader))

assert len(output) == 2
assert output[0]["row"] == {"GeoX": "-1.543611", "GeoY": "53.7975"}
assert output[1]["row"] == {}

assert len(issues.rows) == 1
assert "out of bounds" in issues.rows[0]["issue-type"]
61 changes: 61 additions & 0 deletions tests/unit/phase/test_pivot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env -S pytest -svv
from copy import deepcopy

from digital_land.phase.pivot import PivotPhase


def test_pivot():
input = [
{
"priority": 1,
"entity": 1234,
"resource": "res0123",
"line-number": 1,
"entry-number": 1,
"row": {
"entry-date": "2024-12-04",
"test-field": "test-value",
},
}
]

output = [deepcopy(block) for block in PivotPhase().process(input)]

assert output == [
{
"entity": 1234,
"entry-number": 1,
"line-number": 1,
"priority": 1,
"resource": "res0123",
"row": {
"entity": "",
"entry-date": "2024-12-04",
"entry-number": 1,
"fact": "",
"field": "entry-date",
"line-number": 1,
"priority": 1,
"resource": "res0123",
"value": "2024-12-04",
},
},
{
"entity": 1234,
"entry-number": 1,
"line-number": 1,
"priority": 1,
"resource": "res0123",
"row": {
"entity": "",
"entry-date": "2024-12-04",
"entry-number": 1,
"fact": "",
"field": "test-field",
"line-number": 1,
"priority": 1,
"resource": "res0123",
"value": "test-value",
},
},
]

0 comments on commit cdc48cb

Please sign in to comment.