Skip to content

Commit

Permalink
Raise NotConfigured depending on setting rather middleware configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
Wesley van Lee committed Jan 13, 2025
1 parent bde9252 commit e5fac52
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 6 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ gcs = ["google-cloud-storage"]
all = ["boto3", "google-cloud-storage"]

[project.urls]
Documentation = "https://developers.thequestionmark.org/scrapy-webarchive/"
Repository = "https://github.com/q-m/scrapy-webarchive"

[tool.ruff]
Expand Down
8 changes: 2 additions & 6 deletions scrapy_webarchive/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,8 @@ def _check_configuration_prerequisites(self) -> None:
if not self.settings.get("SW_EXPORT_URI"):
raise NotConfigured("Missing SW_EXPORT_URI setting.")

forbidden_middleware = [
("scrapy_webarchive.spidermiddlewares.WaczCrawlMiddleware", "SPIDER_MIDDLEWARES"),
("scrapy_webarchive.downloadermiddlewares.WaczMiddleware", "DOWNLOADER_MIDDLEWARES"),
]
if any(middleware in self.settings.getlist(key) for middleware, key in forbidden_middleware):
raise NotConfigured("Disable WACZ middlewares in SPIDER_MIDDLEWARES and DOWNLOADER_MIDDLEWARES.")
if self.settings.get("SW_WACZ_SOURCE_URI"):
raise NotConfigured("WACZ exporter is disabled when scraping from a WACZ archive.")

def _retrieve_store_uri_and_wacz_fname(self) -> Tuple[str, Union[str, None]]:
"""Sets up the export URI based on configuration and spider context."""
Expand Down

0 comments on commit e5fac52

Please sign in to comment.