diff --git a/digital_land/collect.py b/digital_land/collect.py index af88c771..de777322 100755 --- a/digital_land/collect.py +++ b/digital_land/collect.py @@ -128,7 +128,7 @@ def fetch( plugin="", ): if end_date and datetime.strptime(end_date, "%Y-%m-%d") < log_datetime: - return FetchStatus.EXPIRED, None + return FetchStatus.EXPIRED url_endpoint = self.url_endpoint(url) if not endpoint: @@ -137,13 +137,13 @@ def fetch( logging.error( "url '%s' given endpoint %s expected %s" % (url, endpoint, url_endpoint) ) - return FetchStatus.HASH_FAILURE, None + return FetchStatus.HASH_FAILURE # fetch each source at most once per-day log_path = self.log_path(log_datetime, endpoint) if os.path.isfile(log_path): logging.debug(f"{log_path} exists") - return FetchStatus.ALREADY_FETCHED, log_path + return FetchStatus.ALREADY_FETCHED log = { "endpoint-url": url, @@ -169,7 +169,7 @@ def fetch( status = self.save_resource(content, log_path, log) self.save_log(log_path, log) - return status, log_path + return status def save_resource(self, content, url, log): if content: diff --git a/digital_land/commands.py b/digital_land/commands.py index ea8583a3..008916c5 100644 --- a/digital_land/commands.py +++ b/digital_land/commands.py @@ -8,6 +8,7 @@ from packaging.version import Version import pandas as pd from pathlib import Path +from datetime import datetime import geojson import shapely @@ -642,13 +643,14 @@ def validate_and_add_data_input( collector = Collector(collection_dir=collection_dir) for endpoint in endpoints: - status, log_path = collector.fetch( + status = collector.fetch( url=endpoint["endpoint-url"], endpoint=endpoint["endpoint"], end_date=endpoint["end-date"], plugin=endpoint["plugin"], ) try: + log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"]) with open(log_path, "r") as f: log = json.load(f) diff --git a/tests/unit/test_collect.py b/tests/unit/test_collect.py index 3a0181d7..1ab44298 100644 --- a/tests/unit/test_collect.py +++ b/tests/unit/test_collect.py @@ -41,7 +41,7 @@ def sha_digest(string): @responses.activate def test_fetch(collector, prepared_response, tmp_path): url = "http://some.url" - status, log_path = collector.fetch(url) + status = collector.fetch(url) assert status == FetchStatus.OK output_path = tmp_path / f"resource/{sha_digest('some data')}" @@ -52,10 +52,10 @@ def test_fetch(collector, prepared_response, tmp_path): @responses.activate def test_already_fetched(collector, prepared_response): - status, log_path = collector.fetch("http://some.url") + status = collector.fetch("http://some.url") assert status == FetchStatus.OK - new_status, log_path = collector.fetch("http://some.url") + new_status = collector.fetch("http://some.url") assert new_status == FetchStatus.ALREADY_FETCHED @@ -63,7 +63,7 @@ def test_already_fetched(collector, prepared_response): def test_expired(collector): yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") - status, log_path = collector.fetch("http://some.url", end_date=yesterday) + status = collector.fetch("http://some.url", end_date=yesterday) assert status == FetchStatus.EXPIRED @@ -71,14 +71,14 @@ def test_expired(collector): @responses.activate def test_hash_check(collector, prepared_response): url = "http://some.url" - status, log_path = collector.fetch(url, endpoint=sha_digest(url)) + status = collector.fetch(url, endpoint=sha_digest(url)) assert status == FetchStatus.OK @responses.activate def test_hash_failure(collector, prepared_response): - status, log_path = collector.fetch("http://some.url", endpoint="http://other.url") + status = collector.fetch("http://some.url", endpoint="http://other.url") assert status == FetchStatus.HASH_FAILURE @@ -126,7 +126,7 @@ def test_strip_timestamp(collector, tmp_path): content_type="application/json", ) - status, log_path = collector.fetch(url) + status = collector.fetch(url) assert status == FetchStatus.OK # Check that the timestamp is removed @@ -156,7 +156,7 @@ def test_strip_timestamp_xml(collector, tmp_path): content_type="application/xml;charset=UTF-8", ) - status, log_path = collector.fetch(url) + status = collector.fetch(url) assert status == FetchStatus.OK # Check that the timestamp is removed