From 438c61601b465b24be177ddca969c7fbf7b78568 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 18 Jan 2025 12:32:29 +0100 Subject: [PATCH] [xfolio] add initial support (#5514, #6351, #6837) --- docs/configuration.rst | 1 + docs/gallery-dl.conf | 6 +- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/xfolio.py | 146 +++++++++++++++++++++++++++++++ test/results/xfolio.py | 88 +++++++++++++++++++ 6 files changed, 247 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/xfolio.py create mode 100644 test/results/xfolio.py diff --git a/docs/configuration.rst b/docs/configuration.rst index 4cf45d4e8b..f76014f55c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -401,6 +401,7 @@ Default ``urlgalleries``, ``vk``, ``weebcentral``, + ``xfolio``, ``zerochan`` * ``"1.0-2.0"`` ``flickr``, diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 0942183be3..f3c9fdbf35 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -702,6 +702,10 @@ { "sleep-request": "0.5-1.5" }, + "xfolio": + { + "sleep-request": "0.5-1.5" + }, "weibo": { "sleep-request": "1.0-2.0", @@ -923,7 +927,7 @@ "config-file" : null, "enabled" : true, "format" : null, - "forward-cookies": false, + "forward-cookies": true, "logging" : true, "module" : null, "outtmpl" : null, diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e55e868a84..791f3d44c5 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1081,6 +1081,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + Xfolio + https://xfolio.jp/ + Series, User Profiles, Works + + xHamster https://xhamster.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6385c04b38..7cfc80312a 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -195,6 +195,7 @@ "wikiart", "wikifeet", "wikimedia", + "xfolio", "xhamster", "xvideos", "yiffverse", diff --git a/gallery_dl/extractor/xfolio.py b/gallery_dl/extractor/xfolio.py new file mode 100644 index 0000000000..a1a5be3c66 --- /dev/null +++ b/gallery_dl/extractor/xfolio.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- + +# Copyright 2025 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://xfolio.jp/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?xfolio\.jp(?:/[^/?#]+)?" + + +class XfolioExtractor(Extractor): + """Base class for xfolio extractors""" + category = "xfolio" + root = "https://xfolio.jp" + cookies_domain = ".xfolio.jp" + directory_fmt = ("{category}", "{creator_slug}", "{work_id}") + filename_fmt = "{work_id}_{image_id}.{extension}" + archive_fmt = "{work_id}_{image_id}" + request_interval = (0.5, 1.5) + + def _init(self): + XfolioExtractor._init = Extractor._init + if not self.cookies_check(("xfolio_session",)): + self.log.error("'xfolio_session' cookie required") + + def items(self): + data = {"_extractor": XfolioWorkExtractor} + for work in self.works(): + yield Message.Queue, work, data + + def request(self, url, **kwargs): + response = Extractor.request(self, url, **kwargs) + + if "/system/recaptcha" in response.url: + raise exception.StopExtraction("Bot check / CAPTCHA page") + + return response + + +class XfolioWorkExtractor(XfolioExtractor): + subcategory = "work" + pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/works/(\d+)" + example = "https://xfolio.jp/portfolio/USER/works/12345" + ref_fmt = ("{}/fullscale_image?image_id={}&work_id={}") + url_fmt = ("{}/user_asset.php?id={}&work_id={}" + "&work_image_id={}&type=work_image") + + def items(self): + creator, work_id = self.groups + url = "{}/portfolio/{}/works/{}".format(self.root, creator, work_id) + html = self.request(url).text + + work = self._extract_data(html) + files = self._extract_files(html, work) + work["count"] = len(files) + + yield Message.Directory, work + for work["num"], file in enumerate(files, 1): + file.update(work) + yield Message.Url, file["url"], file + + def _extract_data(self, html): + creator, work_id = self.groups + extr = text.extract_from(html) + return { + "title" : text.unescape(extr( + 'property="og:title" content="', '"').rpartition(" - ")[0]), + "description" : text.unescape(extr( + 'property="og:description" content="', '"')), + "creator_id" : extr(' data-creator-id="', '"'), + "creator_userid" : extr(' data-creator-user-id="', '"'), + "creator_name" : extr(' data-creator-name="', '"'), + "creator_profile": text.unescape(extr( + ' data-creator-profile="', '"')), + "series_id" : extr("/series/", '"'), + "creator_slug" : creator, + "work_id" : work_id, + } + + def _extract_files(self, html, work): + files = [] + + work_id = work["work_id"] + for img in text.extract_iter( + html, 'class="article__wrap_img', ""): + image_id = text.extr(img, "/fullscale_image?image_id=", "&") + if not image_id: + self.log.warning( + "%s: 'fullscale_image' not available", work_id) + continue + + files.append({ + "image_id" : image_id, + "extension": "jpg", + "url": self.url_fmt.format( + self.root, image_id, work_id, image_id), + "_http_headers": {"Referer": self.ref_fmt.format( + self.root, image_id, work_id)}, + }) + + return files + + +class XfolioUserExtractor(XfolioExtractor): + subcategory = "user" + pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)(?:/works)?/?(?:$|\?|#)" + example = "https://xfolio.jp/portfolio/USER" + + def works(self): + url = "{}/portfolio/{}/works".format(self.root, self.groups[0]) + + while True: + html = self.request(url).text + + for item in text.extract_iter( + html, '
"): + yield text.extr(item, ' href="', '"') + + pager = text.extr(html, ' class="pager__list_next', "") + url = text.extr(pager, ' href="', '"') + if not url: + return + url = text.unescape(url) + + +class XfolioSeriesExtractor(XfolioExtractor): + subcategory = "series" + pattern = BASE_PATTERN + r"/portfolio/([^/?#]+)/series/(\d+)" + example = "https://xfolio.jp/portfolio/USER/series/12345" + + def works(self): + creator, series_id = self.groups + url = "{}/portfolio/{}/series/{}".format(self.root, creator, series_id) + html = self.request(url).text + + return [ + text.extr(item, ' href="', '"') + for item in text.extract_iter( + html, 'class="listWrap--title">', "") + ] diff --git a/test/results/xfolio.py b/test/results/xfolio.py new file mode 100644 index 0000000000..c34b985163 --- /dev/null +++ b/test/results/xfolio.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import xfolio +from gallery_dl import exception + + +__tests__ = ( +{ + "#url" : "https://xfolio.jp/portfolio/yutakashii/works/23977", + "#class" : xfolio.XfolioWorkExtractor, + "#urls" : ( + "https://xfolio.jp/user_asset.php?id=113179&work_id=23977&work_image_id=113179&type=work_image", + "https://xfolio.jp/user_asset.php?id=113182&work_id=23977&work_image_id=113182&type=work_image", + "https://xfolio.jp/user_asset.php?id=113185&work_id=23977&work_image_id=113185&type=work_image", + "https://xfolio.jp/user_asset.php?id=113188&work_id=23977&work_image_id=113188&type=work_image", + "https://xfolio.jp/user_asset.php?id=113191&work_id=23977&work_image_id=113191&type=work_image", + "https://xfolio.jp/user_asset.php?id=113194&work_id=23977&work_image_id=113194&type=work_image", + "https://xfolio.jp/user_asset.php?id=113197&work_id=23977&work_image_id=113197&type=work_image", + "https://xfolio.jp/user_asset.php?id=113200&work_id=23977&work_image_id=113200&type=work_image", + "https://xfolio.jp/user_asset.php?id=113203&work_id=23977&work_image_id=113203&type=work_image", + ), + + "count" : 9, + "num" : range(1, 9), + "creator_id" : "1495", + "creator_name" : "香椎ゆたか", + "creator_profile": "連載中:「いつまでも可愛くしてると思うなよ!」 https://booklive.jp/product/index/title_id/10003104/vol_no/001\r\n 過去作:「まじとら!」「男友達ガール」\r\npixiv:http://pixiv.me/yutakashii\r\nskeb:http://skeb.jp/@yutakashii", + "creator_slug" : "yutakashii", + "creator_userid" : "3778", + "description" : "BookLive NINOにて「男友達ガール」連載開始しました。ルームシェア+TSFで、ある日突然同居人が可愛い女の子になったら…という感じのラブ(?)コメディ...", + "extension" : "jpg", + "image_id" : r"re:113\d\d\d", + "series_id" : "", + "title" : "新連載「男友達ガール」冒頭試し読み", + "url" : str, + "work_id" : "23977", +}, + +{ + "#url" : "https://xfolio.jp/portfolio/yutakashii", + "#class" : xfolio.XfolioUserExtractor, + "#pattern" : xfolio.XfolioWorkExtractor.pattern, + "#count" : range(50, 100), +}, + +{ + "#url" : "https://xfolio.jp/portfolio/yutakashii/works", + "#class" : xfolio.XfolioUserExtractor, +}, +{ + "#url" : "https://xfolio.jp/portfolio/yutakashii/works?page=3", + "#class" : xfolio.XfolioUserExtractor, +}, +{ + "#url" : "https://xfolio.jp/en/portfolio/yutakashii", + "#class" : xfolio.XfolioUserExtractor, +}, +{ + "#url" : "https://xfolio.jp/ko/portfolio/yutakashii", + "#class" : xfolio.XfolioUserExtractor, +}, +{ + "#url" : "https://xfolio.jp/zh-CN/portfolio/yutakashii", + "#class" : xfolio.XfolioUserExtractor, +}, + +{ + "#url" : "https://xfolio.jp/portfolio/donguri/series/1391402", + "#class" : xfolio.XfolioSeriesExtractor, + "#auth" : False, + "#exception": exception.StopExtraction, +}, + +{ + "#url" : "https://xfolio.jp/portfolio/donguri/series/1391402", + "#class" : xfolio.XfolioSeriesExtractor, + "#auth" : True, + "#urls" : ( + "https://xfolio.jp/portfolio/donguri/works/2472402", + "https://xfolio.jp/portfolio/donguri/works/2470700", + ), +}, + +)