Skip to content

Commit

Permalink
ensure where clause isnt used if no range is supplied
Browse files Browse the repository at this point in the history
  • Loading branch information
eveleighoj committed Jan 16, 2025
1 parent 3453840 commit f43907d
Showing 1 changed file with 8 additions and 7 deletions.
15 changes: 8 additions & 7 deletions digital_land/package/dataset_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,17 @@ def load_entities_range(
# Do this to match with later field names.
entity_fields = [e.replace("-", "_") for e in entity_fields]
# input_paths_str = f"{self.cache_dir}/fact{self.suffix}"
if entity_range is not None:
entity_where_clause = (
f"WHERE entity >= {entity_range[0]} AND entity < {entity_range[1]}"
)
else:
entity_where_clause = ""

query = f"""
SELECT DISTINCT REPLACE(field,'-','_')
FROM parquet_scan('{transformed_parquet_dir}/*.parquet')
WHERE entity >= {entity_range[0]} AND entity < {entity_range[1]}
{entity_where_clause}
"""

# distinct_fields - list of fields in the field in fact
Expand Down Expand Up @@ -299,12 +305,7 @@ def load_entities_range(
# query to create the file

# craft a where clause to limit entities in quetion, this chunking helps solve memory issues
if entity_range is not None:
entity_where_clause = (
f"WHERE entity >= {entity_range[0]} AND entity < {entity_range[1]}"
)
else:
entity_where_clause = ""

query = f"""
SELECT {fields_str}{optional_org_str} FROM (
SELECT {fields_str}, CASE WHEN resource_csv."end-date" IS NULL THEN '2999-12-31' ELSE resource_csv."end-date" END AS resource_end_date
Expand Down

0 comments on commit f43907d

Please sign in to comment.