Skip to content

Commit

Permalink
removes log_path from Collector.fetch return
Browse files Browse the repository at this point in the history
  • Loading branch information
CarlosCoelhoSL committed Dec 3, 2024
1 parent 813fdfd commit 217f77c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 13 deletions.
8 changes: 4 additions & 4 deletions digital_land/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def fetch(
plugin="",
):
if end_date and datetime.strptime(end_date, "%Y-%m-%d") < log_datetime:
return FetchStatus.EXPIRED, None
return FetchStatus.EXPIRED

url_endpoint = self.url_endpoint(url)
if not endpoint:
Expand All @@ -137,13 +137,13 @@ def fetch(
logging.error(
"url '%s' given endpoint %s expected %s" % (url, endpoint, url_endpoint)
)
return FetchStatus.HASH_FAILURE, None
return FetchStatus.HASH_FAILURE

# fetch each source at most once per-day
log_path = self.log_path(log_datetime, endpoint)
if os.path.isfile(log_path):
logging.debug(f"{log_path} exists")
return FetchStatus.ALREADY_FETCHED, log_path
return FetchStatus.ALREADY_FETCHED

log = {
"endpoint-url": url,
Expand All @@ -169,7 +169,7 @@ def fetch(
status = self.save_resource(content, log_path, log)

self.save_log(log_path, log)
return status, log_path
return status

def save_resource(self, content, url, log):
if content:
Expand Down
4 changes: 3 additions & 1 deletion digital_land/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from packaging.version import Version
import pandas as pd
from pathlib import Path
from datetime import datetime

import geojson
import shapely
Expand Down Expand Up @@ -642,13 +643,14 @@ def validate_and_add_data_input(
collector = Collector(collection_dir=collection_dir)

for endpoint in endpoints:
status, log_path = collector.fetch(
status = collector.fetch(
url=endpoint["endpoint-url"],
endpoint=endpoint["endpoint"],
end_date=endpoint["end-date"],
plugin=endpoint["plugin"],
)
try:
log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
with open(log_path, "r") as f:
log = json.load(f)

Expand Down
16 changes: 8 additions & 8 deletions tests/unit/test_collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def sha_digest(string):
@responses.activate
def test_fetch(collector, prepared_response, tmp_path):
url = "http://some.url"
status, log_path = collector.fetch(url)
status = collector.fetch(url)

assert status == FetchStatus.OK
output_path = tmp_path / f"resource/{sha_digest('some data')}"
Expand All @@ -52,33 +52,33 @@ def test_fetch(collector, prepared_response, tmp_path):

@responses.activate
def test_already_fetched(collector, prepared_response):
status, log_path = collector.fetch("http://some.url")
status = collector.fetch("http://some.url")
assert status == FetchStatus.OK

new_status, log_path = collector.fetch("http://some.url")
new_status = collector.fetch("http://some.url")
assert new_status == FetchStatus.ALREADY_FETCHED


@responses.activate
def test_expired(collector):
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")

status, log_path = collector.fetch("http://some.url", end_date=yesterday)
status = collector.fetch("http://some.url", end_date=yesterday)

assert status == FetchStatus.EXPIRED


@responses.activate
def test_hash_check(collector, prepared_response):
url = "http://some.url"
status, log_path = collector.fetch(url, endpoint=sha_digest(url))
status = collector.fetch(url, endpoint=sha_digest(url))

assert status == FetchStatus.OK


@responses.activate
def test_hash_failure(collector, prepared_response):
status, log_path = collector.fetch("http://some.url", endpoint="http://other.url")
status = collector.fetch("http://some.url", endpoint="http://other.url")

assert status == FetchStatus.HASH_FAILURE

Expand Down Expand Up @@ -126,7 +126,7 @@ def test_strip_timestamp(collector, tmp_path):
content_type="application/json",
)

status, log_path = collector.fetch(url)
status = collector.fetch(url)

assert status == FetchStatus.OK
# Check that the timestamp is removed
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_strip_timestamp_xml(collector, tmp_path):
content_type="application/xml;charset=UTF-8",
)

status, log_path = collector.fetch(url)
status = collector.fetch(url)

assert status == FetchStatus.OK
# Check that the timestamp is removed
Expand Down

0 comments on commit 217f77c

Please sign in to comment.