Skip to content

Commit

Permalink
Merge pull request #28 from ddd354/back-fixes
Browse files Browse the repository at this point in the history
1-year-update
  • Loading branch information
ddd354 authored Aug 21, 2021
2 parents 0ff7ff1 + 7abb1ae commit ba41c25
Show file tree
Hide file tree
Showing 28 changed files with 496 additions and 106 deletions.
20 changes: 20 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,28 @@ JavOneStop which is a small tool that helps users rename, parse, generate nfo, o
communicate with Emby to add actresses images.

## [Unreleased]


## [0.9.0] - 2021-08-21
### 新增
- 增加deluge磁链下载支持
- 增加tushyraw站点
- 增加javdb的磁链搜索
- 增加zhongziso磁链搜索
### 改进 / 修复
- 更新nyaa磁链搜索路径
- 默认JAV浏览切换至JavLibrary
- 修复新的115下载失败的逻辑
- 修复下载图片失败
- 改进文件写入逻辑
- 修正部分节点不使用代理
- 修复后缀命名逻辑
- 修复各站点的xpath数据路径
- 修复部分节点不支持cloudflare
- 修复一些刮削失败情况下的处理
### 移除
- 移除torrentkitty搜索
- 移除添加115任务至Aria2下载器

## [0.8.0] - 2020-09-17
### 新增
Expand Down
46 changes: 36 additions & 10 deletions JavHelper/core/OOF_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


LOCAL_OOF_COOKIES = '115_cookies.json'
STANDARD_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.87 Safari/537.36'
STANDARD_UA = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36 115Browser/24.0.2.2'
STANDARD_HEADERS = {"Content-Type": "application/x-www-form-urlencoded", 'User-Agent': STANDARD_UA}


Expand Down Expand Up @@ -106,6 +106,25 @@ def get_task_detail_from_hash(self, hash_str: str):
oof_file_id = task.get('file_id') # this is actually cid
break

if not oof_file_id and task.get('err', 0)==10016:
# download is failing but good file could be there still
_task_name = task.get('name')
print('magnet task {} is failing, trying directory search'.format(_task_name))
# standard file list for "云下载", cid is static
_cloud_down_url_template = """https://webapi.115.com/files?aid=1&cid=2099631682968616080&o=user_ptime&asc=0&offset=0
&show_dir=1&limit=10&code=&scid=&snap=0&natsort=1&record_open_time=1&source=&format=json"""
req = requests.get(_cloud_down_url_template, headers=STANDARD_HEADERS, cookies=self.cookies)
try:
for _file in req.json().get('data', []):
if _file.get('n') == _task_name:
oof_file_id = _file.get('cid')
break
#else:
# print(_file.get('n'))
except Exception as other_e:
print(req.text)
raise other_e

if not oof_file_id:
raise NoTaskException(f'cannot find {hash_str} task from task list')

Expand All @@ -127,6 +146,7 @@ def filter_task_details(task_detail: dict):
in_shas = []
for file_obj in task_detail.get('data', []):
processed_file_obj = {
'name': file_obj.get('n'), # name for the file
'cid': file_obj.get('cid'),
'sha': file_obj.get('sha'),
'pickup_code': file_obj.get('pc'), # IMPORTANT, used for download
Expand All @@ -142,6 +162,7 @@ def filter_task_details(task_detail: dict):
return rt

def download_aria_on_pcode(self, cid: str, pickup_code: str):
""" no longer support"""
referer_url = f'https://115.com/?ct=file&ac=userfile&is_wl_tpl=1&aid=1&cid={cid}'
download_header = ''
url = 'http://webapi.115.com/files/download?pickcode={}'.format(pickup_code)
Expand Down Expand Up @@ -184,33 +205,38 @@ def handle_jav_download(self, car: str, magnet: str):
except Exception as create_magnet_e:
return {'error': self.translate_map['oof_fail_magnet'].format(car=car, create_magnet_e=create_magnet_e)}

download_files = None # declare var to check later
while retry_num < 3:
try:
# get task detail from list page
search_hash = created_task['info_hash']
task_detail = self.get_task_detail_from_hash(search_hash)
# filter out unwanted files
download_files = self.filter_task_details(task_detail)
if not download_files:
return {'error': self.translate_map['oof_no_file'] + f' {car}'}
break
except NoTaskException as _e:
return {'error': self.translate_map['oof_no_task_found'].format(car)}
#except NoTaskException as _e:
# return {'error': self.translate_map['oof_no_task_found'].format(car)}
except Exception as _e:
retry_num += 1
sleep(15)
print(f'current error: {_e}, retrying')
e = _e

if not download_files:
return {'error': self.translate_map['oof_no_file'] + f' {car}'}

# send download info to aria2
try:
down_file_list = []
for download_file in download_files:
self.download_aria_on_pcode(download_file['cid'],
download_file['pickup_code'])
_single_file = {'list': [{
'n': download_file.get('name'),
'pc': download_file.get('pickup_code'),
'is_dir': False
}], 'count': 1}
down_file_list.append(_single_file)

# if everything went well, update stat
jav_obj['stat'] = 4
db_conn.upcreate_jav(jav_obj)
jav_obj['down_list'] = down_file_list
return jav_obj
except Exception as _e:
print_exc()
Expand Down
15 changes: 10 additions & 5 deletions JavHelper/core/file_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from JavHelper.core.backend_translation import BackendTranslation
from JavHelper.core.nfo_parser import EmbyNfo
from JavHelper.core.requester_proxy import return_get_res

from JavHelper.core.ini_file import return_default_config_string

Expand Down Expand Up @@ -70,7 +71,7 @@ def write_images(self, jav_obj, fail_on_error=False):
fanart_path = os.path.join(directory, fanart_name+image_ext)

try:
r = requests.get(url_obj.geturl(), stream=True)
r = return_get_res(url_obj.geturl(), stream=True)
except Exception as e:
print('Image download failed for {} due to {}'.format(url_obj.geturl(), e))
return
Expand All @@ -82,9 +83,13 @@ def write_images(self, jav_obj, fail_on_error=False):
print('Image download failed for {}'.format(url_obj.geturl()))
return

with open(fanart_path, 'wb') as pic:
for chunk in r:
pic.write(chunk)
try:
with open(fanart_path, 'wb') as pic:
for chunk in r:
pic.write(chunk)
except PermissionError as e:
print('Fanart write error {} due to {}'.format(fanart_path, e))
return

# 裁剪生成 poster
img = Image.open(fanart_path)
Expand Down Expand Up @@ -199,7 +204,7 @@ def extract_CDs_postfix_filename(self, file_name: str):
"""
allowed_postfixes = {
r'^(.+?)([ABab])$': {'a': 'cd1', 'b': 'cd2'},
r'^(.+?)(CD\d|cd\d)$': None,
r'^(.+?)(CD\d+|cd\d+)$': None,
}
cd_postfix = ''
if not self.handle_multi_cds:
Expand Down
2 changes: 2 additions & 0 deletions JavHelper/core/ini_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
'emby_address': ['emby专用', '网址'],
'emby_api': ['emby专用', 'API ID'],
'javlibrary_url': ['其他设置', 'javlibrary网址'],
'javbus_url': ['其他设置', 'javbus网址'],
'jav_obj_priority': ['其他设置', '刮削信息优先度'],
'folder_structure': ['本地设置', '保存路径模板'],
'display_language': ["其他设置", "界面语言(cn/en)"],
Expand All @@ -43,6 +44,7 @@
'emby_address': "",
'emby_api': "",
'javlibrary_url': "http://www.p42u.com/cn/",
'javbus_url': "https://www.dmmbus.bar/",
'jav_obj_priority': "javlibrary,javbus,javdb,arzon",
'folder_structure': "{year}/{car}",
'display_language': 'cn',
Expand Down
10 changes: 9 additions & 1 deletion JavHelper/core/javbus.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ def postprocess(self):
if self.jav_obj.get('image'):
# get rid of https to have consistent format with other sources
self.jav_obj['image'] = self.jav_obj['image'].lstrip('https:').lstrip('http:')
# new local image logic
self.jav_obj['image'] = self.jav_url.lstrip('http').lstrip('s://').rstrip('/') + self.jav_obj['image']
if self.jav_obj.get('length'):
self.jav_obj['length'] = self.jav_obj['length'].lstrip(' ')[:-2]
if self.jav_obj.get('title'):
Expand Down Expand Up @@ -112,7 +114,7 @@ def javbus_magnet_search(car: str):
magnets[_i].update({k: _value.strip('\t').strip('\r').strip('\n').strip()})
if k == 'size':
magnets[_i].update({'size_sort': parsed_size_to_int(_value.strip('\t').strip('\r').strip('\n').strip())})

return magnets


Expand All @@ -137,6 +139,12 @@ def javbus_set_page(page_template: str, page_num=1, url_parameter=None, config=N
jav_objs_raw = defaultlist(dict)
for k, v in xpath_dict.items():
_values = root.xpath(v)

# new logic for local images
javbus_img_url = javbus_url.lstrip('http').lstrip('s://').rstrip('/')
if k == 'img':
_values = [javbus_img_url +_ind for _ind in _values if 'dmm.co.jp' not in _ind]

for _i, _value in enumerate(_values):
jav_objs_raw[_i].update({k: _value})

Expand Down
80 changes: 42 additions & 38 deletions JavHelper/core/javdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from JavHelper.core.jav_scraper import JavScraper
from JavHelper.core import JAVNotFoundException
from JavHelper.core.requester_proxy import return_html_text, return_post_res, return_get_res
from JavHelper.core.utils import re_parse_html, re_parse_html_list_field, defaultlist
from JavHelper.core.utils import defaultlist, CloudFlareError
from JavHelper.core.ini_file import return_config_string, return_default_config_string
from JavHelper.core.utils import parsed_size_to_int
from JavHelper.core.backend_translation import BackendTranslation
Expand All @@ -26,7 +26,7 @@ def __init__(self, *args, **kwargs):
'search_field': {
'title': '//h2[@class="title is-4"]/strong/text()',
'studio': '//div[strong="片商:"]/a/text()',
'premiered': '//div[strong="時間:"]/span/text()',
'premiered': '//div[strong="日期:"]/span/text()',
#'year': processed from release date
'length': '//div[strong="時長:"]/span/text()',
# 'director': no good source
Expand Down Expand Up @@ -60,29 +60,35 @@ def postprocess(self):

def get_single_jav_page(self):
# new autocomplete search, no rate limit
#
# autocomplete endpoint no longer there
#
# https://javdb.com/videos/search_autocomplete.json?q=luxu-1298
search_url = self.jav_url + 'videos/search_autocomplete.json?q={}'.format(self.car)
"""search_url = self.jav_url + 'videos/search_autocomplete.json?q={}'.format(self.car)
jav_search_result = return_html_text(search_url, behind_cloudflare=True)
try:
jav_search_result = json.loads(jav_search_result)
self.total_index = len(jav_search_result)
for i, _rst in enumerate(jav_search_result):
if _rst['number'] == self.car.upper():
result_first_url = self.jav_url + 'v/{}'.format(_rst['uid'])
return return_get_res(result_first_url).content.decode('utf-8'), self.total_index
return return_get_res(result_first_url, behind_cloudflare=True).content.decode('utf-8'), self.total_index
except Exception as e:
print(f'issue encounter when autocomplete search javdb {self.car} - {e}')
pass
pass"""

# perform search first, not reliable at all, often multiple results
# https://javdb4.com/search?q=MILK-08&f=all
search_url = self.jav_url + 'search?q={}&f=all'.format(self.car)

jav_search_content = return_get_res(search_url).content
jav_search_content = return_get_res(search_url, behind_cloudflare=True).content
search_root = etree.HTML(jav_search_content)

search_results = search_root.xpath('//a[@class="box"]/@href')
if b'Please turn JavaScript on' in jav_search_content:
import ipdb; ipdb.set_trace()
raise CloudFlareError(f'cloudflare failure on {self.car}')

search_results = search_root.xpath('//a[@class="box"]/@href')

self.total_index = len(search_results)
# need to match car
Expand All @@ -93,36 +99,9 @@ def get_single_jav_page(self):
raise Exception(f'{self.car} does not match javdb search result: {matched_car}')

result_first_url = self.jav_url + search_results[self.pick_index][1:]
print(f'javdb found {self.car} at {result_first_url}')

return return_get_res(result_first_url).content.decode('utf-8'), self.total_index


def javbus_magnet_search(car: str):
jav_url = return_config_string(['其他设置', 'javbus网址'])
gid_match = r'.*?var gid = (\d*);.*?'
magnet_xpath = {
'magnet': '//tr/td[position()=1]/a[1]/@href',
'title': '//tr/td[position()=1]/a[1]/text()',
'size': '//tr/td[position()=2]/a[1]/text()'
}
main_url_template = jav_url+'{car}'
magnet_url_template = jav_url+'ajax/uncledatoolsbyajax.php?gid={gid}&uc=0'

res = return_get_res(main_url_template.format(car=car)).text
gid = re.search(gid_match, res).groups()[0]

res = return_get_res(magnet_url_template.format(gid=gid), headers={'referer': main_url_template.format(car=car)}).content
root = etree.HTML(res)

magnets = defaultlist(dict)
for k, v in magnet_xpath.items():
_values = root.xpath(v)
for _i, _value in enumerate(_values):
magnets[_i].update({k: _value.strip('\t').strip('\r').strip('\n').strip()})
if k == 'size':
magnets[_i].update({'size_sort': parsed_size_to_int(_value.strip('\t').strip('\r').strip('\n').strip())})

return magnets
return return_get_res(result_first_url, behind_cloudflare=True).content.decode('utf-8'), self.total_index


def javdb_set_page(page_template: str, page_num=1, url_parameter=None, config=None) -> dict:
Expand All @@ -138,7 +117,7 @@ def javdb_set_page(page_template: str, page_num=1, url_parameter=None, config=No
xpath_max_page = '//ul[@class="pagination-list"]/li/a[@class="pagination-link"][last()]/text()'

# force to get url from ini file each time
javdb_url = 'https://javdb4.com/'
javdb_url = 'https://javdb.com/'
set_url = javdb_url + page_template.format(page_num=page_num, url_parameter=url_parameter)
print(f'accessing {set_url}')

Expand Down Expand Up @@ -225,4 +204,29 @@ def search_for_actress(javlib_actress_code: str, page_num=1):
raise Exception(BackendTranslation()['no_support_set_search'].format(set_type))

jav_objs, max_page = search_map[set_type]['function'](**search_map[set_type]['params'])
return jav_objs, max_page
return jav_objs, max_page


def javdb_magnet_search(car: str):
size_match = r'(\d*.*\d*((GB)|(MB)))'
magnet_xpath = {
'magnet': '//tr/td[@class="magnet-name"]/a[1]/@href',
'title': '//tr/td[@class="magnet-name"]/a[1]/span[1]/text()',
'size': '//tr/td[@class="magnet-name"]/a[1]/span[@class="meta"]/text()'
}

car = car.upper()
res, page = JavDBScraper({'car': car}).get_single_jav_page()
root = etree.HTML(res)

magnets = defaultlist(dict)
for k, v in magnet_xpath.items():
_values = root.xpath(v)
for _i, _value in enumerate(_values):
if k == 'size':
_size = re.search(size_match, _value.strip('\t').strip('\r').strip('\n').strip().lstrip('(').strip()).groups()[0]
magnets[_i].update({'size_sort': parsed_size_to_int(_size)})
magnets[_i].update({'size': _size})
else:
magnets[_i].update({k: _value.strip('\t').strip('\r').strip('\n').strip()})
return magnets
5 changes: 3 additions & 2 deletions JavHelper/core/javlibrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,12 @@ def get_single_jav_page(self):
return return_get_res(lib_search_url, behind_cloudflare=True).content, 1
# 第二种情况:搜索结果可能是两个以上,所以这种匹配找不到标题,None!
else: # 继续找标题,但匹配形式不同,这是找“可能是多个结果的网页”上的第一个标题
search_results = re.findall(r'v=javli(.+?)" title=".+?-\d+?[a-z]? ', jav_html)
#import ipdb; ipdb.set_trace()
search_results = re.findall(r'v=jav(.+?)" title=".+?-\d+?[a-z]? ', jav_html)
# 搜索有几个结果,用第一个AV的网页,打开它
if search_results:
self.total_index = len(search_results)
result_first_url = self.jav_url + '?v=javli' + search_results[self.pick_index]
result_first_url = self.jav_url + '?v=jav' + search_results[self.pick_index]
return return_get_res(result_first_url, behind_cloudflare=True).content, self.total_index
# 第三种情况:搜索不到这部影片,搜索结果页面什么都没有
else:
Expand Down
Loading

0 comments on commit ba41c25

Please sign in to comment.