Skip to content

Commit

Permalink
read just columns instead of whole .csv
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlosCoelhoSL committed Dec 16, 2024
1 parent d68cdca commit 3b4248f
Showing 1 changed file with 11 additions and 5 deletions.
16 changes: 11 additions & 5 deletions digital_land/utils/add_data_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import csv
import os
from datetime import datetime
from urllib.parse import urlparse
Expand Down Expand Up @@ -76,16 +77,21 @@ def get_column_field_summary(
column_field_summary += "\n\nUnmapped Columns:"
# Try reading from converted .csv, if FileNotFound then resource is already .csv
try:
converted_resource_df = pd.read_csv(
os.path.join(converted_dir, endpoint_resource_info["resource"] + ".csv")
)
with open(
os.path.join(converted_dir, endpoint_resource_info["resource"] + ".csv"),
"r",
) as f:
reader = csv.DictReader(f)
converted_resource_columns = reader.fieldnames
except FileNotFoundError:
converted_resource_df = pd.read_csv(endpoint_resource_info["resource_path"])
with open(endpoint_resource_info["resource_path"], "r") as f:
reader = csv.DictReader(f)
converted_resource_columns = reader.fieldnames

# Find columns that are in resource that aren't in the column field log
unmapped_columns = [
column
for column in converted_resource_df.columns
for column in converted_resource_columns
if column not in column_field_df["column"].values
]

Expand Down

0 comments on commit 3b4248f

Please sign in to comment.