-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
a68341f
commit e5a7efb
Showing
3 changed files
with
134 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
import logging | ||
from typing import Optional | ||
|
||
import bs4 | ||
from praw.models import Submission | ||
|
||
from bdfrx.exceptions import SiteDownloaderError | ||
from bdfrx.resource import Resource | ||
from bdfrx.site_authenticator import SiteAuthenticator | ||
from bdfrx.site_downloaders.base_downloader import BaseDownloader | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class NsfwPics(BaseDownloader): | ||
def __init__(self, post: Submission) -> None: | ||
super().__init__(post) | ||
|
||
def find_resources(self, authenticator: Optional[SiteAuthenticator] = None) -> list[Resource]: | ||
links = self._get_links(self.post.url) | ||
if not links: | ||
raise SiteDownloaderError("nsfw.pics parser could not find any links") | ||
links = [Resource(self.post, link, Resource.retry_download(link)) for link in links] | ||
return links | ||
|
||
@staticmethod | ||
def _get_album_links(url: str) -> list: | ||
image_pages = [] | ||
album = NsfwPics.retrieve_url(f"{url}") | ||
soup = bs4.BeautifulSoup(album.text, "html.parser") | ||
album_divs = soup.find("div", attrs={"class": "pad-content-listing"}) | ||
links = album_divs.find_all("div", {"data-type": "image"}) | ||
for link in links: | ||
image_pages.append(link.get("data-url-short")) | ||
return image_pages | ||
|
||
@staticmethod | ||
def _get_links(url: str) -> set[str]: | ||
resources = [] | ||
urls = NsfwPics._get_album_links(url) if "/album/" in url else [url] | ||
for url in urls: | ||
page = NsfwPics.retrieve_url(url) | ||
soup = bs4.BeautifulSoup(page.text, "html.parser") | ||
image_link = soup.find("input", attrs={"id": "embed-code-2"}).get("value") | ||
resources.append(image_link) | ||
return set(resources) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
from unittest.mock import Mock | ||
|
||
import pytest | ||
|
||
from bdfrx.resource import Resource | ||
from bdfrx.site_downloaders.nsfw_pics import NsfwPics | ||
|
||
|
||
@pytest.mark.online | ||
@pytest.mark.parametrize( | ||
("test_url", "expected"), | ||
( | ||
( | ||
"https://nsfw.pics/album/Test.l2t", # Album | ||
{ | ||
"https://nsfw.pics/image/OdfV", | ||
"https://nsfw.pics/image/ObUF", | ||
"https://nsfw.pics/image/OOV7", | ||
"https://nsfw.pics/image/OD71", | ||
"https://nsfw.pics/image/O6du", | ||
}, | ||
), | ||
), | ||
) | ||
def test_get_album(test_url: str, expected: set[str]): | ||
results = NsfwPics._get_album_links(test_url) | ||
assert len(results) == len(expected) | ||
assert sorted(results) == sorted(expected) | ||
|
||
|
||
@pytest.mark.online | ||
@pytest.mark.parametrize( | ||
("test_url", "expected"), | ||
( | ||
( | ||
"https://nsfw.pics/album/Test.l2t", # Album | ||
{ | ||
"https://i.nsfw.pics/b8007b506022132fe857eead3dc98a92.gif", | ||
"https://i.nsfw.pics/aa0541830d5d16743bca9bfb48e16b7b.gif", | ||
"https://i.nsfw.pics/b4afb5a33e68d3d74a547f62684cddc9.jpeg", | ||
"https://i.nsfw.pics/131ed0764342b570a338af37cdd75e3e.jpeg", | ||
"https://i.nsfw.pics/c447389dee315f5960eb29671fb56232.jpeg", | ||
}, | ||
), | ||
( | ||
"https://nsfw.pics/image/OdfV", # Single image | ||
{"https://i.nsfw.pics/b8007b506022132fe857eead3dc98a92.gif"}, | ||
), | ||
), | ||
) | ||
def test_get_links(test_url: str, expected: set[str]): | ||
results = NsfwPics._get_links(test_url) | ||
assert sorted(results) == sorted(expected) | ||
|
||
|
||
@pytest.mark.online | ||
@pytest.mark.slow | ||
@pytest.mark.parametrize( | ||
("test_url", "expected_hashes"), | ||
( | ||
( | ||
"https://nsfw.pics/album/Test.l2t", # Album | ||
{ | ||
"9ceac1e26c4799b0a6b7d5453a73f53b", | ||
"8ff9229c39ad5403e9859a21d5aec103", | ||
"907f92b1c295d5f84f4f64aacc960079", | ||
"1098edadc345ec948d37e1541ed867eb", | ||
"fb60e0a42a0f7f0929f5a5ae401a3518", | ||
}, | ||
), | ||
( | ||
"https://nsfw.pics/image/OdfV", # Single image | ||
{"9ceac1e26c4799b0a6b7d5453a73f53b"}, | ||
), | ||
), | ||
) | ||
def test_download_resources(test_url: str, expected_hashes: set[str]): | ||
mock_download = Mock() | ||
mock_download.url = test_url | ||
downloader = NsfwPics(mock_download) | ||
results = downloader.find_resources() | ||
assert all(isinstance(res, Resource) for res in results) | ||
[res.download() for res in results] | ||
hashes = {res.hash.hexdigest() for res in results} | ||
assert hashes == set(expected_hashes) |