diff --git a/makerules/collection.mk b/makerules/collection.mk index 86578cf..d48f3f1 100644 --- a/makerules/collection.mk +++ b/makerules/collection.mk @@ -42,13 +42,17 @@ COLLECTION_INDEX=\ $(COLLECTION_DIR)/log.csv\ $(COLLECTION_DIR)/resource.csv +# collection URL +COLLECTION_URL=\ + $(DATASTORE_URL)$(COLLECTION)-collection/collection + init:: - $(eval LOG_STATUS_CODE := $(shell curl -I -o /dev/null -s -w "%{http_code}" '$(DATASTORE_URL)$(REPOSITORY)/$(COLLECTION_DIR)log.csv')) - $(eval RESOURCE_STATUS_CODE = $(shell curl -I -o /dev/null -s -w "%{http_code}" '$(DATASTORE_URL)$(REPOSITORY)/$(COLLECTION_DIR)resource.csv')) + $(eval LOG_STATUS_CODE := $(shell curl -I -o /dev/null -s -w "%{http_code}" '$(COLLECTION_URL)/log.csv')) + $(eval RESOURCE_STATUS_CODE = $(shell curl -I -o /dev/null -s -w "%{http_code}" '$(COLLECTION_URL)/resource.csv')) @if [ $(LOG_STATUS_CODE) -ne 403 ] && [ $(RESOURCE_STATUS_CODE) -ne 403 ]; then \ - echo 'Downloading log.csv and resource.csv'; \ - curl -qfsL '$(DATASTORE_URL)$(REPOSITORY)/$(COLLECTION_DIR)log.csv' > $(COLLECTION_DIR)log.csv; \ - curl -qfsL '$(DATASTORE_URL)$(REPOSITORY)/$(COLLECTION_DIR)resource.csv' > $(COLLECTION_DIR)resource.csv; \ + echo 'Downloading log.csv and resource.csv'; \; \ + curl -qfsL '$(COLLECTION_URL)/log.csv' > $(COLLECTION_DIR)log.csv; \ + curl -qfsL '$(COLLECTION_URL)/resource.csv' > $(COLLECTION_DIR)resource.csv; \ else \ echo 'Unable to locate log.csv and resource.csv' ;\ fi @@ -68,7 +72,7 @@ clobber-today:: rm -rf $(LOG_FILES_TODAY) $(COLLECTION_INDEX) makerules:: - curl -qfsL '$(SOURCE_URL)/makerules/main/collection.mk' > makerules/collection.mk + curl -qfsL '$(MAKERULES_URL)collection.mk' > makerules/collection.mk commit-collection:: git add collection/log @@ -96,6 +100,10 @@ collection/resource/%: @mkdir -p collection/resource/ curl -qfsL '$(DATASTORE_URL)$(REPOSITORY)/$(RESOURCE_DIR)$(notdir $@)' > $@ +collection/$(COLLECTION)/resource/%: + @mkdir -p collection/resource/ + curl -qfsL '$(COLLECTION_URL)/resource/$(notdir $@)' > $@ + collection/%.csv: @mkdir -p $(COLLECTION_DIR) curl -qfsL '$(COLLECTION_CONFIG_URL)$(notdir $@)' > $@ diff --git a/makerules/development.mk b/makerules/development.mk index 7e81738..146513b 100644 --- a/makerules/development.mk +++ b/makerules/development.mk @@ -24,4 +24,4 @@ prune:: rm -rf ./.venv makerules:: - curl -qfsL '$(SOURCE_URL)/makerules/main/development.mk' > makerules/development.mk + curl -qfsL '$(MAKERULES_URL)development.mk' > makerules/development.mk diff --git a/makerules/makerules.mk b/makerules/makerules.mk index 4932796..7532129 100644 --- a/makerules/makerules.mk +++ b/makerules/makerules.mk @@ -11,6 +11,10 @@ ifeq ($(SOURCE_URL),) SOURCE_URL=https://raw.githubusercontent.com/digital-land/ endif +ifeq ($(MAKERULES_URL),) +MAKERULES_URL=$(SOURCE_URL)makerules/main/ +endif + ifeq ($(CONFIG_URL),) CONFIG_URL=https://raw.githubusercontent.com/digital-land/config/main/ endif @@ -31,8 +35,12 @@ define dataset_url 'https://$(COLLECTION_DATASET_BUCKET_NAME).s3.eu-west-2.amazonaws.com/$(2)-collection/dataset/$(1).sqlite3' endef +ifeq ($(VAR_DIR),) +VAR_DIR=var/ +endif + ifeq ($(CACHE_DIR),) -CACHE_DIR=var/cache/ +CACHE_DIR=$(VAR_DIR)cache/ endif @@ -113,11 +121,11 @@ clean:: # prune back to source code prune:: - rm -rf ./var $(VALIDATION_DIR) + rm -rf ./$(VAR_DIR) $(VALIDATION_DIR) # update makerules from source makerules:: - curl -qfsL '$(SOURCE_URL)/makerules/main/makerules.mk' > makerules/makerules.mk + curl -qfsL '$(MAKERULES_URL)makerules.mk' > makerules/makerules.mk ifeq (,$(wildcard ./makerules/specification.mk)) # update local copies of specification files diff --git a/makerules/pipeline.mk b/makerules/pipeline.mk index c773fe4..2cfa207 100644 --- a/makerules/pipeline.mk +++ b/makerules/pipeline.mk @@ -29,8 +29,12 @@ ifeq ($(FIXED_DIR),) FIXED_DIR=fixed/ endif +ifeq ($(VAR_DIR),) +VAR_DIR=var/ +endif + ifeq ($(CACHE_DIR),) -CACHE_DIR=var/cache/ +CACHE_DIR=$(VAR_DIR)cache/ endif ifeq ($(TRANSFORMED_DIR),) @@ -50,15 +54,15 @@ OPERATIONAL_ISSUE_DIR=$(PERFORMANCE_DIR)operational_issue/ endif ifeq ($(COLUMN_FIELD_DIR),) -COLUMN_FIELD_DIR=var/column-field/ +COLUMN_FIELD_DIR=$(VAR_DIR)column-field/ endif ifeq ($(DATASET_RESOURCE_DIR),) -DATASET_RESOURCE_DIR=var/dataset-resource/ +DATASET_RESOURCE_DIR=$(VAR_DIR)dataset-resource/ endif ifeq ($(CONVERTED_RESOURCE_DIR),) -CONVERTED_RESOURCE_DIR=var/converted-resource/ +CONVERTED_RESOURCE_DIR=$(VAR_DIR)converted-resource/ endif ifeq ($(DATASET_DIR),) @@ -104,16 +108,16 @@ endif define run-pipeline mkdir -p $(@D) $(ISSUE_DIR)$(notdir $(@D)) $(OPERATIONAL_ISSUE_DIR) $(COLUMN_FIELD_DIR)$(notdir $(@D)) $(DATASET_RESOURCE_DIR)$(notdir $(@D)) $(CONVERTED_RESOURCE_DIR)$(notdir $(@D)) - digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(@D)) $(DIGITAL_LAND_FLAGS) pipeline $(1) --issue-dir $(ISSUE_DIR)$(notdir $(@D)) --column-field-dir $(COLUMN_FIELD_DIR)$(notdir $(@D)) --dataset-resource-dir $(DATASET_RESOURCE_DIR)$(notdir $(@D)) --converted-resource-dir $(CONVERTED_RESOURCE_DIR)$(notdir $(@D)) --config-path $(CACHE_DIR)config.sqlite3 $(PIPELINE_FLAGS) $< $@ + digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(@D)) --pipeline-dir $(PIPELINE_DIR) $(DIGITAL_LAND_FLAGS) pipeline $(1) --issue-dir $(ISSUE_DIR)$(notdir $(@D)) --column-field-dir $(COLUMN_FIELD_DIR)$(notdir $(@D)) --dataset-resource-dir $(DATASET_RESOURCE_DIR)$(notdir $(@D)) --converted-resource-dir $(CONVERTED_RESOURCE_DIR)$(notdir $(@D)) --config-path $(CACHE_DIR)config.sqlite3 --organisation-path $(CACHE_DIR)organisation.csv $(PIPELINE_FLAGS) $< $@ endef define build-dataset = mkdir -p $(@D) - time digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(basename $@)) dataset-create --output-path $(basename $@).sqlite3 --organisation-path $(CACHE_DIR)organisation.csv --issue-dir $(ISSUE_DIR) --column-field-dir=$(COLUMN_FIELD_DIR) --dataset-resource-dir $(DATASET_RESOURCE_DIR) $(^) + time digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(basename $@)) --pipeline-dir $(PIPELINE_DIR) dataset-create --output-path $(basename $@).sqlite3 --organisation-path $(CACHE_DIR)organisation.csv --issue-dir $(ISSUE_DIR) --column-field-dir=$(COLUMN_FIELD_DIR) --dataset-resource-dir $(DATASET_RESOURCE_DIR) $(^) time datasette inspect $(basename $@).sqlite3 --inspect-file=$(basename $@).sqlite3.json - time digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(basename $@)) dataset-entries $(basename $@).sqlite3 $@ + time digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(basename $@)) --pipeline-dir $(PIPELINE_DIR) dataset-entries $(basename $@).sqlite3 $@ mkdir -p $(FLATTENED_DIR) - time digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(basename $@)) dataset-entries-flattened $@ $(FLATTENED_DIR) + time digital-land ${DIGITAL_LAND_OPTS} --dataset $(notdir $(basename $@)) --pipeline-dir $(PIPELINE_DIR) dataset-entries-flattened $@ $(FLATTENED_DIR) md5sum $@ $(basename $@).sqlite3 csvstack $(ISSUE_DIR)$(notdir $(basename $@))/*.csv > $(basename $@)-issue.csv mkdir -p $(EXPECTATION_DIR) @@ -154,7 +158,7 @@ clobber:: rm -rf $(DATASET_DIRS) clean:: - rm -rf ./var + rm -rf ./$(VAR_DIR) # local copy of the organisation dataset # Download historic operational issue log data for relevant datasets @@ -176,7 +180,7 @@ init:: $(CACHE_DIR)organisation.csv done makerules:: - curl -qfsL '$(SOURCE_URL)/makerules/main/pipeline.mk' > makerules/pipeline.mk + curl -qfsL '$(MAKERULES_URL)pipeline.mk' > makerules/pipeline.mk save-transformed:: aws s3 sync $(TRANSFORMED_DIR) s3://$(COLLECTION_DATASET_BUCKET_NAME)/$(REPOSITORY)/$(TRANSFORMED_DIR) --no-progress @@ -204,7 +208,7 @@ save-performance:: # convert an individual resource # .. this assumes conversion is the same for every dataset, but it may not be soon var/converted/%.csv: collection/resource/% - mkdir -p var/converted/ + mkdir -p $(VAR_DIR)converted/ digital-land ${DIGITAL_LAND_OPTS} convert $< transformed::