From 012b3f7bc411f32e92f325996c3ed78819c58aa3 Mon Sep 17 00:00:00 2001 From: Wesley van Lee Date: Wed, 13 Nov 2024 14:12:32 +0100 Subject: [PATCH] Write error response to WACZ (#19, PR#20) --- scrapy_webarchive/extensions.py | 4 ++-- tests/test_extensions.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scrapy_webarchive/extensions.py b/scrapy_webarchive/extensions.py index fcb9014..5cd5956 100644 --- a/scrapy_webarchive/extensions.py +++ b/scrapy_webarchive/extensions.py @@ -107,7 +107,7 @@ def from_crawler(cls, crawler: Crawler) -> Self: except AttributeError: exporter = cls(crawler.settings, crawler) - crawler.signals.connect(exporter.response_received, signal=signals.response_received) + crawler.signals.connect(exporter.response_downloaded, signal=signals.response_downloaded) crawler.signals.connect(exporter.spider_closed, signal=signals.spider_closed) crawler.signals.connect(exporter.spider_opened, signal=signals.spider_opened) return exporter @@ -143,7 +143,7 @@ def from_settings(cls, settings: Settings, crawler: Crawler): def spider_opened(self) -> None: self.writer.write_warcinfo(robotstxt_obey=self.settings["ROBOTSTXT_OBEY"]) - def response_received(self, response: Response, request: Request, spider: Spider) -> None: + def response_downloaded(self, response: Response, request: Request, spider: Spider) -> None: request.meta["WARC-Date"] = get_formatted_dt_string(format=WARC_DT_FORMAT) # Write response WARC record diff --git a/tests/test_extensions.py b/tests/test_extensions.py index 9b6a775..8f36217 100644 --- a/tests/test_extensions.py +++ b/tests/test_extensions.py @@ -40,7 +40,7 @@ def test_get_store(self, *args): extension = WaczExporter.from_crawler(crawler) assert isinstance(extension.store, FTPFilesStore) - def test_response_received(self): + def test_response_downloaded(self): crawler = get_crawler(settings_dict={"SW_EXPORT_URI": "/tmp/scrapy-webarchive/wacz/"}) crawler.spider = crawler._create_spider("quotes") extension = WaczExporter.from_crawler(crawler) @@ -49,7 +49,7 @@ def test_response_received(self): # Call the method under test request = Request("http://example.com") response = Response(request.url) - extension.response_received(response, request, crawler.spider) + extension.response_downloaded(response, request, crawler.spider) # Verify that the WARC date was set in request meta assert "WARC-Date" in request.meta