Skip to content

Commit

Permalink
up pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
wqerwr committed Aug 21, 2021
1 parent 97860ae commit 87ef2f6
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 10 deletions.
8 changes: 6 additions & 2 deletions JavHelper/core/javdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from JavHelper.core.jav_scraper import JavScraper
from JavHelper.core import JAVNotFoundException
from JavHelper.core.requester_proxy import return_html_text, return_post_res, return_get_res
from JavHelper.core.utils import re_parse_html, re_parse_html_list_field, defaultlist
from JavHelper.core.utils import defaultlist, CloudFlareError
from JavHelper.core.ini_file import return_config_string, return_default_config_string
from JavHelper.core.utils import parsed_size_to_int
from JavHelper.core.backend_translation import BackendTranslation
Expand Down Expand Up @@ -84,8 +84,11 @@ def get_single_jav_page(self):
jav_search_content = return_get_res(search_url, behind_cloudflare=True).content
search_root = etree.HTML(jav_search_content)

search_results = search_root.xpath('//a[@class="box"]/@href')
if b'Please turn JavaScript on' in jav_search_content:
import ipdb; ipdb.set_trace()
raise CloudFlareError(f'cloudflare failure on {self.car}')

search_results = search_root.xpath('//a[@class="box"]/@href')

self.total_index = len(search_results)
# need to match car
Expand All @@ -96,6 +99,7 @@ def get_single_jav_page(self):
raise Exception(f'{self.car} does not match javdb search result: {matched_car}')

result_first_url = self.jav_url + search_results[self.pick_index][1:]
print(f'javdb found {self.car} at {result_first_url}')

return return_get_res(result_first_url, behind_cloudflare=True).content.decode('utf-8'), self.total_index

Expand Down
4 changes: 4 additions & 0 deletions JavHelper/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
import re


class CloudFlareError(Exception):
# error exception when cloudflare readout is failing
pass

def byte_to_MB(some_input):
if isinstance(some_input, int) or str(some_input).isdigit():
return int(some_input)/1024/1024
Expand Down
13 changes: 9 additions & 4 deletions JavHelper/views/parse_jav.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from JavHelper.core.jav777 import jav777_download_search
from JavHelper.core.jav321 import Jav321Scraper
from JavHelper.core.file_scanner import EmbyFileStructure
from JavHelper.core.utils import parsed_size_to_int
from JavHelper.core.utils import parsed_size_to_int, CloudFlareError

if return_default_config_string('db_type') == 'sqlite':
from JavHelper.model.jav_manager import SqliteJavManagerDB as JavManagerDB
Expand Down Expand Up @@ -350,9 +350,14 @@ def parse_single_jav(jav_obj: dict, sources):
scraped_info = SOURCES_MAP[scrape]({'car': jav_obj['car']}).scrape_jav()
except Exception as e:
errors = (jav_obj.get('errors') or [])
errors.append(
'{} cannot be found in {}'.format(jav_obj['car'], scrape)
)
if e.__class__ == CloudFlareError:
errors.append(
'{} scrape failed in {} due to cloudflare issue'.format(jav_obj['car'], scrape)
)
else:
errors.append(
'{} scrape encountered an error in {}'.format(jav_obj['car'], scrape)
)
scraped_info = {'errors': errors}
print(scraped_info, e)
jav_obj.update(scraped_info)
Expand Down
8 changes: 4 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
certifi==2019.11.28
chardet==3.0.4
idna==2.8
requests==2.22.0
urllib3==1.25.7
requests==2.22.0 # do not upgrade for now
urllib3==1.25.7 # issue with cloudscraper
ipdb==0.12.3
pillow==7.1.0
pillow==8.2.0
PyInstaller==3.6
Flask==1.1.1
pylint==2.4.4
lxml==4.4.2
lxml==4.6.3
smart-open==1.9.0
blitzdb==0.4.4
aria2p==0.7.1
Expand Down

0 comments on commit 87ef2f6

Please sign in to comment.