Replace X | Y type annotation with Union for Python <3.10 compatibility

q-m · Oct 15, 2024 · 1dc564d · 1dc564d
1 parent e0dea76
commit 1dc564d
Show file tree

Hide file tree

Showing 3 changed files with 15 additions and 15 deletions.
diff --git a/scrapy_webarchive/downloadermiddlewares.py b/scrapy_webarchive/downloadermiddlewares.py
@@ -1,5 +1,5 @@
 import re
-from typing import IO, List
+from typing import IO, List, Union
 
 from scrapy import signals
 from scrapy.crawler import Crawler
@@ -24,7 +24,7 @@ class WaczMiddleware:
     This helps to work with large archives, including remote ones.
     """
 
-    wacz: WaczFile | MultiWaczFile
+    wacz: Union[WaczFile, MultiWaczFile]
 
     def __init__(self, settings: Settings, stats: StatsCollector) -> None:
         self.stats = stats
@@ -48,7 +48,7 @@ def spider_opened(self, spider: Spider) -> None:
         tp = {"timeout": self.timeout}
         multiple_entries = len(self.wacz_urls) != 1
 
-        def open_wacz_file(wacz_url: str) -> IO[bytes] | None:
+        def open_wacz_file(wacz_url: str) -> Union[IO[bytes], None]:
             spider.logger.info(f"[WACZDownloader] Opening WACZ {wacz_url}")
 
             try:

diff --git a/scrapy_webarchive/middleware.py b/scrapy_webarchive/middleware.py
@@ -1,5 +1,5 @@
 import re
-from typing import IO, List
+from typing import IO, List, Union
 from urllib.parse import urlparse
 
 from scrapy import Request, Spider, signals
@@ -15,7 +15,7 @@
 
 
 class WaczCrawlMiddleware:
-    wacz: WaczFile | MultiWaczFile
+    wacz: Union[WaczFile, MultiWaczFile]
 
     def __init__(self, settings: Settings, stats: StatsCollector) -> None:
         self.stats = stats
@@ -42,7 +42,7 @@ def spider_opened(self, spider: Spider) -> None:
         tp = {"timeout": self.timeout}
         multiple_entries = len(self.wacz_urls) != 1
 
-        def open_wacz_file(wacz_url: str) -> IO[bytes] | None:
+        def open_wacz_file(wacz_url: str) -> Union[IO[bytes], None]:
             spider.logger.info(f"[WACZDownloader] Opening WACZ {wacz_url}")
 
             try:

diff --git a/scrapy_webarchive/wacz.py b/scrapy_webarchive/wacz.py
@@ -3,7 +3,7 @@
 import os
 import zipfile
 from collections import defaultdict
-from typing import IO, Generator, List
+from typing import IO, Generator, List, Union
 
 from warc import WARCReader as BaseWARCReader
 from warc.warc import WARCRecord
@@ -81,14 +81,14 @@ def __init__(self, file: IO[bytes]):
         self.wacz_file = zipfile.ZipFile(file)
         self.index = self._parse_index(self._get_index(self.wacz_file))
 
-    def _find_in_index(self, url: str) -> CdxjRecord | None:
+    def _find_in_index(self, url: str) -> Union[CdxjRecord, None]:
         records = self.index.get(url, [])
 
         # If multiple entries are present, the last one is most likely to be relevant
         return records[-1] if records else None
 
-    def get_warc_from_cdxj_record(self, cdxj_record: CdxjRecord) -> WARCRecord | None:
-        warc_file: gzip.GzipFile | IO[bytes]
+    def get_warc_from_cdxj_record(self, cdxj_record: CdxjRecord) -> Union[WARCRecord, None]:
+        warc_file: Union[gzip.GzipFile, IO[bytes]]
 
         try:
             warc_file = self.wacz_file.open("archive/" + cdxj_record.data["filename"])
@@ -101,7 +101,7 @@ def get_warc_from_cdxj_record(self, cdxj_record: CdxjRecord) -> WARCRecord | Non
 
         return WARCReader(warc_file).read_record()
 
-    def get_warc_from_url(self, url: str) -> WARCRecord | None:
+    def get_warc_from_url(self, url: str) -> Union[WARCRecord, None]:
         cdxj_record = self._find_in_index(url)
         return self.get_warc_from_cdxj_record(cdxj_record) if cdxj_record else None
 
@@ -111,7 +111,7 @@ def iter_index(self) -> Generator[CdxjRecord, None, None]:
                 yield cdxj_record
 
     @staticmethod
-    def _get_index(wacz_file: zipfile.ZipFile) -> gzip.GzipFile | IO[bytes]:
+    def _get_index(wacz_file: zipfile.ZipFile) -> Union[gzip.GzipFile, IO[bytes]]:
         """Opens the index file from the WACZ archive, checking for .cdxj, .cdxj.gz, .cdx. and .cdx.gz"""
 
         index_paths = [
@@ -134,7 +134,7 @@ def _get_index(wacz_file: zipfile.ZipFile) -> gzip.GzipFile | IO[bytes]:
 
         raise FileNotFoundError("No valid index file found.")
 
-    def _parse_index(self, index_file: gzip.GzipFile | IO[bytes]) -> dict[str, List[CdxjRecord]]:
+    def _parse_index(self, index_file: Union[gzip.GzipFile, IO[bytes]]) -> dict[str, List[CdxjRecord]]:
         cdxj_records = defaultdict(list)
 
         for line in index_file:
@@ -155,10 +155,10 @@ class MultiWaczFile:
     def __init__(self, wacz_files: List[IO[bytes]]) -> None:
         self.waczs = [WaczFile(wacz_file) for wacz_file in wacz_files]
 
-    def get_warc_from_cdxj_record(self, cdxj_record: CdxjRecord) -> WARCRecord | None:
+    def get_warc_from_cdxj_record(self, cdxj_record: CdxjRecord) -> Union[WARCRecord, None]:
         return cdxj_record.wacz_file.get_warc_from_cdxj_record(cdxj_record) if cdxj_record.wacz_file else None
 
-    def get_warc_from_url(self, url: str) -> WARCRecord | None:
+    def get_warc_from_url(self, url: str) -> Union[WARCRecord, None]:
         for wacz in self.waczs:
             warc_record = wacz.get_warc_from_url(url)
             if warc_record: