Skip to content

Commit

Permalink
Add imgchest downloader
Browse files Browse the repository at this point in the history
Adds downloader for imgchest albums
  • Loading branch information
OMEGARAZER committed May 16, 2023
1 parent 48e065d commit 8b2344d
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 2 deletions.
2 changes: 1 addition & 1 deletion bdfrx/site_downloaders/catbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> l
return links

@staticmethod
def get_links(url: str) -> set[str]:
def _get_links(url: str) -> set[str]:
content = Catbox.retrieve_url(url)
soup = bs4.BeautifulSoup(content.text, "html.parser")
collection_div = soup.find("div", attrs={"class": "imagecontainer"})
Expand Down
3 changes: 3 additions & 0 deletions bdfrx/site_downloaders/download_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from bdfrx.site_downloaders.fallback_downloaders.ytdlp_fallback import YtdlpFallback
from bdfrx.site_downloaders.gallery import Gallery
from bdfrx.site_downloaders.gfycat import Gfycat
from bdfrx.site_downloaders.imgchest import Imgchest
from bdfrx.site_downloaders.imgur import Imgur
from bdfrx.site_downloaders.pornhub import PornHub
from bdfrx.site_downloaders.redgifs import Redgifs
Expand Down Expand Up @@ -41,6 +42,8 @@ def pull_lever(url: str) -> type[BaseDownloader]: # noqa: PLR0911,PLR0912
return DelayForReddit
if re.match(r"reddit\.com/gallery/.*", sanitised_url) or re.match(r"patreon\.com.*", sanitised_url):
return Gallery
if re.match(r"imgchest\.com/p/", sanitised_url):
return Imgchest
if re.match(r"reddit\.com/r/", sanitised_url):
return SelfPost
if re.match(r"(m\.)?youtu\.?be", sanitised_url):
Expand Down
35 changes: 35 additions & 0 deletions bdfrx/site_downloaders/imgchest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import logging
from typing import Optional

import bs4
from praw.models import Submission

from bdfrx.exceptions import SiteDownloaderError
from bdfrx.resource import Resource
from bdfrx.site_authenticator import SiteAuthenticator
from bdfrx.site_downloaders.base_downloader import BaseDownloader

logger = logging.getLogger(__name__)


class Imgchest(BaseDownloader):
def __init__(self, post: Submission) -> None:
super().__init__(post)

def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]:
links = self._get_links(self.post.url)
if not links:
raise SiteDownloaderError("Imgchest parser could not find any links")
links = [Resource(self.post, link, Resource.retry_download(link)) for link in links]
return links

@staticmethod
def _get_links(url: str) -> set[str]:
page = Imgchest.retrieve_url(url)
soup = bs4.BeautifulSoup(page.text, "html.parser")
album_div = soup.find("div", attrs={"id": "post-images"})
images = album_div.find_all("img")
out = [im.get("src") for im in images]
videos = album_div.find_all("source")
out.extend([vid.get("src") for vid in videos])
return set(out)
2 changes: 1 addition & 1 deletion tests/site_downloaders/test_catbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
),
)
def test_get_links(test_url: str, expected: set[str]):
results = Catbox.get_links(test_url)
results = Catbox._get_links(test_url)
assert results == expected


Expand Down
83 changes: 83 additions & 0 deletions tests/site_downloaders/test_imgchest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
from unittest.mock import Mock

import pytest

from bdfrx.resource import Resource
from bdfrx.site_downloaders.imgchest import Imgchest


@pytest.mark.online
@pytest.mark.parametrize(
("test_url", "expected"),
(
(
"https://www.imgchest.com/p/ro24aogylj5",
{
"https://cdn.imgchest.com/files/jd7ogcgl5y9.jpg",
"https://cdn.imgchest.com/files/rj7kzcdv27m.jpg",
"https://cdn.imgchest.com/files/vmy2pc2pr7j.jpg",
"https://cdn.imgchest.com/files/xl7lxce967o.jpg",
},
),
(
"https://www.imgchest.com/p/o24ap5wd4lj",
{
"https://cdn.imgchest.com/files/k46ac86kq7z.jpeg",
"https://cdn.imgchest.com/files/pyvdczlvayk.jpeg",
"https://cdn.imgchest.com/files/6yxkcvlrn7w.jpeg",
"https://cdn.imgchest.com/files/b49zce5wkyw.jpeg",
"https://cdn.imgchest.com/files/l4necb3kw4m.jpeg",
"https://cdn.imgchest.com/files/p7bwc3rx37n.mp4",
"https://cdn.imgchest.com/files/w7pjcbe587p.mp4",
"https://cdn.imgchest.com/files/d7ogcr95jy9.mp4",
"https://cdn.imgchest.com/files/j7kzc9r557m.mp4",
"https://cdn.imgchest.com/files/my2pc3wzl7j.mp4",
},
),
),
)
def test_get_links(test_url: str, expected: set[str]):
results = Imgchest._get_links(test_url)
assert results == expected


@pytest.mark.online
@pytest.mark.slow
@pytest.mark.parametrize(
("test_url", "expected_hashes"),
(
(
"https://www.imgchest.com/p/ro24aogylj5",
{
"91f1a5919b32af6cbf5c24528e83871c",
"c4969ac347fdcefbb6b2ec01c0be02ae",
"a9db23217974d8b78c84b463224f130a",
"6a0d0e28f02c2cdccff80f9973efbad3",
},
),
(
"https://www.imgchest.com/p/o24ap5wd4lj",
{
"a4ea3f676c8a1cbca8e2faf70a031e1e",
"59db5f35f5969d638c4036a3a249b1e1",
"73ee75fe341022cd643431a4fb78be3d",
"6fe6f1239dd39f948b3abb583c310c7d",
"8e9b652c62b906ba54607c7fd8ce6d63",
"108b167b04830ce0a59c27415bb5ef86",
"05a063fe87fb010ca782c268d0bf90c5",
"5ef705919760684d54e082430f32551a",
"7ff437036cac57e04aaabcfd604ad2c8",
"d2e3eb303f3a605b2a8587f914b78c34",
},
),
),
)
def test_download_resources(test_url: str, expected_hashes: set[str]):
mock_download = Mock()
mock_download.url = test_url
downloader = Imgchest(mock_download)
results = downloader.find_resources()
assert all(isinstance(res, Resource) for res in results)
[res.download() for res in results]
hashes = {res.hash.hexdigest() for res in results}
assert hashes == set(expected_hashes)

0 comments on commit 8b2344d

Please sign in to comment.