raises HTTPError when failing to collect from URL

digital-land · Dec 3, 2024 · 6e94e3c · 6e94e3c
1 parent 217f77c
commit 6e94e3c
Show file tree

Hide file tree

Showing 2 changed files with 35 additions and 32 deletions.
diff --git a/digital_land/commands.py b/digital_land/commands.py
@@ -11,6 +11,7 @@
 from datetime import datetime
 
 import geojson
+from requests import HTTPError
 import shapely
 
 from digital_land.package.organisation import OrganisationPackage
@@ -641,7 +642,6 @@ def validate_and_add_data_input(
 
     # if successfully added we can now attempt to fetch from endpoint
     collector = Collector(collection_dir=collection_dir)
-
     for endpoint in endpoints:
         status = collector.fetch(
             url=endpoint["endpoint-url"],
@@ -653,32 +653,33 @@ def validate_and_add_data_input(
             log_path = collector.log_path(datetime.utcnow(), endpoint["endpoint"])
             with open(log_path, "r") as f:
                 log = json.load(f)
-
-            # Resource and path will only be printed if downloaded successfully
-            if log.get("resource", None):
-                print(
-                    "Resource collected: ",
-                    log.get("resource"),
-                )
-                print(
-                    "Resource Path is: ",
-                    Path(collection_dir) / "resource" / log.get("resource"),
-                )
-
-            status = log.get("status", None)
-            # Use exception instead of status if there is no status
-            if not status:
-                status = log.get("exception")
-
-            log_message = f"Log Status for {endpoint['endpoint']}:"
-            if status != "200":
-                log_message += " The status is not 200."
-            print(log_message + f" The status is {status}")
-
         except Exception as e:
             print(
                 f"Error: The log file for {endpoint} could not be read from path {log_path}.\n{e}"
             )
+            break
+
+        status = log.get("status", None)
+        # Raise exception if status is not 200
+        if not status or status != "200":
+            exception = log.get("exception", None)
+            raise HTTPError(
+                f"Failed to collect from URL with status: {status if status else exception}"
+            )
+
+        # Resource and path will only be printed if downloaded successfully
+        if log.get("resource", None):
+            print(
+                "Resource collected: ",
+                log.get("resource"),
+            )
+            print(
+                "Resource Path is: ",
+                Path(collection_dir) / "resource" / log.get("resource"),
+            )
+
+        print(f"Log Status for {endpoint['endpoint']}: The status is {status}")
+
     return collection
 
 

diff --git a/tests/integration/test_add_data.py b/tests/integration/test_add_data.py
@@ -4,6 +4,7 @@
 import tempfile
 from unittest.mock import Mock
 import pytest
+from requests import HTTPError
 
 from digital_land.commands import validate_and_add_data_input
 from tests.acceptance.conftest import copy_latest_specification_files_to
@@ -352,7 +353,7 @@ def test_validate_and_add_data(
 
 
 def test_validate_and_add_data_input_non_200(
-    collection_dir, specification_dir, organisation_csv, capsys, mocker
+    collection_dir, specification_dir, organisation_csv, mocker
 ):
 
     mock_response = Mock()
@@ -377,15 +378,16 @@ def test_validate_and_add_data_input_non_200(
 
     tmp_input_path = create_input_csv(no_error_input_data)
 
-    validate_and_add_data_input(
-        tmp_input_path,
-        collection_name,
-        collection_dir,
-        specification_dir,
-        organisation_csv,
-    )
+    with pytest.raises(HTTPError) as error:
+        validate_and_add_data_input(
+            tmp_input_path,
+            collection_name,
+            collection_dir,
+            specification_dir,
+            organisation_csv,
+        )
 
-    assert "The status is not 200" in capsys.readouterr().out
+    assert "Failed to collect from URL with status: 404" in str(error)
 
 
 def test_validate_and_add_data_input_duplicate_endpoint(