Skip to content

Commit

Permalink
WIP: Initial try of a dataset_update command.
Browse files Browse the repository at this point in the history
  • Loading branch information
cjohns-scottlogic committed Jan 17, 2025
1 parent f55d595 commit 78bc149
Show file tree
Hide file tree
Showing 3 changed files with 1,136 additions and 1 deletion.
51 changes: 51 additions & 0 deletions digital_land/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,57 @@ def dataset_create(
package.add_counts()


#
# Update dataset from processed resources
#
def dataset_update(
input_paths,
dataset_path,
organisation_path,
pipeline,
dataset,
specification,
issue_dir="issue",
column_field_dir="var/column-field",
dataset_resource_dir="var/dataset-resource",
):
if not dataset_path:
print(f"missing dataset path {dataset_path}", file=sys.stderr)
sys.exit(2)

# Set up initial objects
column_field_dir = Path(column_field_dir)
dataset_resource_dir = Path(dataset_resource_dir)
organisation = Organisation(
organisation_path=organisation_path, pipeline_dir=Path(pipeline.path)
)
package = DatasetPackage(
dataset,
organisation=organisation,
path=dataset_path,
specification_dir=None, # TBD: package should use this specification object
)

for path in input_paths:
path_obj = Path(path)
package.load_transformed(path)
package.load_column_fields(column_field_dir / dataset / path_obj.name)
package.load_dataset_resource(dataset_resource_dir / dataset / path_obj.name)
package.load_entities()

# TODO: Handle updates to old-entity.csv. We can currently ignore this, as changes to
# old-entity.csv will trigger a full rebuild rather than incremental loading.

issue_paths = os.path.join(issue_dir, dataset)
if os.path.exists(issue_paths):
for issue_path in os.listdir(issue_paths):
package.load_issues(os.path.join(issue_paths, issue_path))
else:
logging.warning("No directory for this dataset in the provided issue_directory")

package.add_counts()


def dataset_dump(input_path, output_path):
cmd = f"sqlite3 -header -csv {input_path} 'select * from entity;' > {output_path}"
logging.info(cmd)
Expand Down
Loading

0 comments on commit 78bc149

Please sign in to comment.