Add imgchest downloader

Adds downloader for imgchest albums
OMEGARAZER · May 16, 2023 · 8b2344d · 8b2344d
1 parent 48e065d
commit 8b2344d
Show file tree

Hide file tree

Showing 5 changed files with 123 additions and 2 deletions.
diff --git a/bdfrx/site_downloaders/catbox.py b/bdfrx/site_downloaders/catbox.py
@@ -25,7 +25,7 @@ def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> l
         return links
 
     @staticmethod
-    def get_links(url: str) -> set[str]:
+    def _get_links(url: str) -> set[str]:
         content = Catbox.retrieve_url(url)
         soup = bs4.BeautifulSoup(content.text, "html.parser")
         collection_div = soup.find("div", attrs={"class": "imagecontainer"})

diff --git a/bdfrx/site_downloaders/download_factory.py b/bdfrx/site_downloaders/download_factory.py
@@ -10,6 +10,7 @@
 from bdfrx.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
 from bdfrx.site_downloaders.gallery import Gallery
 from bdfrx.site_downloaders.gfycat import Gfycat
+from bdfrx.site_downloaders.imgchest import Imgchest
 from bdfrx.site_downloaders.imgur import Imgur
 from bdfrx.site_downloaders.pornhub import PornHub
 from bdfrx.site_downloaders.redgifs import Redgifs
@@ -41,6 +42,8 @@ def pull_lever(url: str) -> type[BaseDownloader]:  # noqa: PLR0911,PLR0912
             return DelayForReddit
         if re.match(r"reddit\.com/gallery/.*", sanitised_url) or re.match(r"patreon\.com.*", sanitised_url):
             return Gallery
+        if re.match(r"imgchest\.com/p/", sanitised_url):
+            return Imgchest
         if re.match(r"reddit\.com/r/", sanitised_url):
             return SelfPost
         if re.match(r"(m\.)?youtu\.?be", sanitised_url):

diff --git a/bdfrx/site_downloaders/imgchest.py b/bdfrx/site_downloaders/imgchest.py
@@ -0,0 +1,35 @@
+import logging
+from typing import Optional
+
+import bs4
+from praw.models import Submission
+
+from bdfrx.exceptions import SiteDownloaderError
+from bdfrx.resource import Resource
+from bdfrx.site_authenticator import SiteAuthenticator
+from bdfrx.site_downloaders.base_downloader import BaseDownloader
+
+logger = logging.getLogger(__name__)
+
+
+class Imgchest(BaseDownloader):
+    def __init__(self, post: Submission) -> None:
+        super().__init__(post)
+
+    def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
+        links = self._get_links(self.post.url)
+        if not links:
+            raise SiteDownloaderError("Imgchest parser could not find any links")
+        links = [Resource(self.post, link, Resource.retry_download(link)) for link in links]
+        return links
+
+    @staticmethod
+    def _get_links(url: str) -> set[str]:
+        page = Imgchest.retrieve_url(url)
+        soup = bs4.BeautifulSoup(page.text, "html.parser")
+        album_div = soup.find("div", attrs={"id": "post-images"})
+        images = album_div.find_all("img")
+        out = [im.get("src") for im in images]
+        videos = album_div.find_all("source")
+        out.extend([vid.get("src") for vid in videos])
+        return set(out)
diff --git a/tests/site_downloaders/test_catbox.py b/tests/site_downloaders/test_catbox.py
@@ -25,7 +25,7 @@
     ),
 )
 def test_get_links(test_url: str, expected: set[str]):
-    results = Catbox.get_links(test_url)
+    results = Catbox._get_links(test_url)
     assert results == expected
 
 

diff --git a/tests/site_downloaders/test_imgchest.py b/tests/site_downloaders/test_imgchest.py
@@ -0,0 +1,83 @@
+from unittest.mock import Mock
+
+import pytest
+
+from bdfrx.resource import Resource
+from bdfrx.site_downloaders.imgchest import Imgchest
+
+
+@pytest.mark.online
+@pytest.mark.parametrize(
+    ("test_url", "expected"),
+    (
+        (
+            "https://www.imgchest.com/p/ro24aogylj5",
+            {
+                "https://cdn.imgchest.com/files/jd7ogcgl5y9.jpg",
+                "https://cdn.imgchest.com/files/rj7kzcdv27m.jpg",
+                "https://cdn.imgchest.com/files/vmy2pc2pr7j.jpg",
+                "https://cdn.imgchest.com/files/xl7lxce967o.jpg",
+            },
+        ),
+        (
+            "https://www.imgchest.com/p/o24ap5wd4lj",
+            {
+                "https://cdn.imgchest.com/files/k46ac86kq7z.jpeg",
+                "https://cdn.imgchest.com/files/pyvdczlvayk.jpeg",
+                "https://cdn.imgchest.com/files/6yxkcvlrn7w.jpeg",
+                "https://cdn.imgchest.com/files/b49zce5wkyw.jpeg",
+                "https://cdn.imgchest.com/files/l4necb3kw4m.jpeg",
+                "https://cdn.imgchest.com/files/p7bwc3rx37n.mp4",
+                "https://cdn.imgchest.com/files/w7pjcbe587p.mp4",
+                "https://cdn.imgchest.com/files/d7ogcr95jy9.mp4",
+                "https://cdn.imgchest.com/files/j7kzc9r557m.mp4",
+                "https://cdn.imgchest.com/files/my2pc3wzl7j.mp4",
+            },
+        ),
+    ),
+)
+def test_get_links(test_url: str, expected: set[str]):
+    results = Imgchest._get_links(test_url)
+    assert results == expected
+
+
+@pytest.mark.online
+@pytest.mark.slow
+@pytest.mark.parametrize(
+    ("test_url", "expected_hashes"),
+    (
+        (
+            "https://www.imgchest.com/p/ro24aogylj5",
+            {
+                "91f1a5919b32af6cbf5c24528e83871c",
+                "c4969ac347fdcefbb6b2ec01c0be02ae",
+                "a9db23217974d8b78c84b463224f130a",
+                "6a0d0e28f02c2cdccff80f9973efbad3",
+            },
+        ),
+        (
+            "https://www.imgchest.com/p/o24ap5wd4lj",
+            {
+                "a4ea3f676c8a1cbca8e2faf70a031e1e",
+                "59db5f35f5969d638c4036a3a249b1e1",
+                "73ee75fe341022cd643431a4fb78be3d",
+                "6fe6f1239dd39f948b3abb583c310c7d",
+                "8e9b652c62b906ba54607c7fd8ce6d63",
+                "108b167b04830ce0a59c27415bb5ef86",
+                "05a063fe87fb010ca782c268d0bf90c5",
+                "5ef705919760684d54e082430f32551a",
+                "7ff437036cac57e04aaabcfd604ad2c8",
+                "d2e3eb303f3a605b2a8587f914b78c34",
+            },
+        ),
+    ),
+)
+def test_download_resources(test_url: str, expected_hashes: set[str]):
+    mock_download = Mock()
+    mock_download.url = test_url
+    downloader = Imgchest(mock_download)
+    results = downloader.find_resources()
+    assert all(isinstance(res, Resource) for res in results)
+    [res.download() for res in results]
+    hashes = {res.hash.hexdigest() for res in results}
+    assert hashes == set(expected_hashes)