Skip to content

Commit

Permalink
- implemented searching
Browse files Browse the repository at this point in the history
- unit tests for searching
- implemented searching filters
  • Loading branch information
EchterAlsFake committed Feb 5, 2024
1 parent e4572fb commit da42885
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 18 deletions.
4 changes: 2 additions & 2 deletions xvideos_api/modules/consts.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re

REGEX_VIDEO_CHECK_URL = re.compile(r'https://www.xvideos.com/(.*?)')
REGEX_VIDEO_CHECK_URL = re.compile(r'https://www.xvideos.com/video(.*?)')
REGEX_VIDEO_M3U8 = re.compile(r"html5player\.setVideoHLS\('([^']+)'\);")
REGEX_VIDEO_TAGS = re.compile(r'href="/tags/(.*?)" class="is-keyword', re.DOTALL)
REGEX_VIDEO_VIEWS = re.compile(r'<strong class="mobile-hide">(.*?)</strong>')
Expand All @@ -12,4 +12,4 @@
REGEX_VIDEO_LENGTH = re.compile(r'<span class="duration">(.*?)</span>')
REGEX_VIDEO_PORNSTARS = re.compile(r'a href="/models/(.*?)" class=')

REGEX_SEARCH_SCRAPE_VIDEOS = re.compile(r'<div id="video_(.*?)" data-id="')
REGEX_SEARCH_SCRAPE_VIDEOS = re.compile(r'none;"><a href="(.*?)">', re.DOTALL)
1 change: 1 addition & 0 deletions xvideos_api/modules/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class SortDate:


class SortVideoTime:
Sort_all = "allduration"
Sort_short = "1-3min"
Sort_middle = "3-10min"
Sort_long = "10min_more"
Expand Down
86 changes: 86 additions & 0 deletions xvideos_api/tests/test_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
from ..xvideos_api import Client, Sort, SortVideoTime, SortQuality, SortDate

# This is a deep test for the searching functionalities...

client = Client()
query = "Mia Khalifa"


def video_object_test(object):
for idx, video in enumerate(object):
assert isinstance(video.title, str) and len(video.title) > 0

if idx == 3:
break


def test_base_search():
videos = client.search(query, pages=1)
for video in videos:
assert isinstance(video.title, str) and len(video.title) > 0


def test_Sort_search():
videos = client.search(query, sorting_Sort=Sort.Sort_rating)
videos_2 = client.search(query, sorting_Sort=Sort.Sort_relevance)
videos_3 = client.search(query, sorting_Sort=Sort.Sort_views)
videos_4 = client.search(query, sorting_Sort=Sort.Sort_length)
videos_5 = client.search(query, sorting_Sort=Sort.Sort_random)
videos_6 = client.search(query, sorting_Sort=Sort.Sort_upload_date)

video_object_test(videos)
video_object_test(videos_2)
video_object_test(videos_3)
video_object_test(videos_4)
video_object_test(videos_5)
video_object_test(videos_6)


def test_SortVideoTime_search():
videos = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_long)
videos_2 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_all)
videos_3 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_short)
videos_4 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_middle)
videos_5 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_really_long)
videos_6 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_long_10_20min)



video_object_test(videos)
video_object_test(videos_2)
video_object_test(videos_3)
video_object_test(videos_4)
video_object_test(videos_5)
video_object_test(videos_6)


def test_SortQuality_search():
videos = client.search(query, pages=1, sort_Quality=SortQuality.Sort_720p)
videos_2 = client.search(query, pages=1, sort_Quality=SortQuality.Sort_all)
videos_3 = client.search(query, pages=1, sort_Quality=SortQuality.Sort_1080_plus)

video_object_test(videos)
video_object_test(videos_2)
video_object_test(videos_3)


def test_SortDate_search():
videos = client.search(query, pages=1, sorting_Date=SortDate.Sort_all)
videos_2 = client.search(query, pages=1, sorting_Date=SortDate.Sort_week)
videos_3 = client.search(query, pages=1, sorting_Date=SortDate.Sort_month)
videos_4 = client.search(query, pages=1, sorting_Date=SortDate.Sort_last_3_days)
videos_5 = client.search(query, pages=1, sorting_Date=SortDate.Sort_last_3_months)
videos_6 = client.search(query, pages=1, sorting_Date=SortDate.Sort_last_6_months)

video_object_test(videos)
video_object_test(videos_2)
video_object_test(videos_3)
video_object_test(videos_4)
video_object_test(videos_5)
video_object_test(videos_6)






44 changes: 28 additions & 16 deletions xvideos_api/xvideos_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def __init__(self, url):
self.json_data = self.flatten_json(nested_json=self.extract_json_from_html())
self.script_content = self.get_script_content()


@classmethod
def check_url(cls, url):
match = REGEX_VIDEO_CHECK_URL.match(url)
Expand Down Expand Up @@ -272,27 +271,40 @@ def get_video(cls, url):
return Video(url)

@classmethod
def search(cls, query, sorting_Sort: Sort, sorting_Date: SortDate, sorting_Time: SortVideoTime,
sort_Quality: SortQuality, pages=2):

url = f"https://www.xvideos.com/?k={query}&sort={sorting_Sort}%&datef={sorting_Date}&durf={sorting_Time}&quality={sort_Quality}"
videos_ids = []

for page in range(pages):
response = requests.get(f"{url}&p={page}").content.decode("utf-8")
list_ids = REGEX_SEARCH_SCRAPE_VIDEOS.findall(response)
def extract_video_urls(cls, html_content):
# Parse the HTML content with BeautifulSoup
soup = BeautifulSoup(html_content, 'lxml')
video_urls = []

for video_id in list_ids:
videos_ids.append(video_id)

for id in videos_ids:
yield Video(f"https://xvideos.com/video{id}")
# Find all 'div' elements with the class 'thumb'
thumb_divs = soup.find_all('div', class_='thumb')

# Iterate over each 'thumb' div and extract the 'href' attribute from the 'a' tag within it
for div in thumb_divs:
a_tag = div.find('a', href=True) # Find the first 'a' tag with an 'href' attribute
if a_tag and a_tag['href']: # Ensure the 'a' tag and its 'href' attribute exist
video_urls.append(a_tag['href'])

return video_urls

@classmethod
def search(cls, query, sorting_Sort: Sort = Sort.Sort_relevance, sorting_Date: SortDate = SortDate.Sort_all,
sorting_Time: SortVideoTime = SortVideoTime.Sort_all, sort_Quality: SortQuality = SortQuality.Sort_all,
pages=2):

query = query.replace(" ", "+")

Client().search(query="mia", sort_Quality=SortQuality.Sort_720p, sorting_Sort=Sort.Sort_rating, sorting_Date=SortDate.Sort_all, sorting_Time=SortVideoTime.Sort_long)
url = f"https://www.xvideos.com/?k={query}&sort={sorting_Sort}%&datef={sorting_Date}&durf={sorting_Time}&quality={sort_Quality}"
urls = []
for page in range(pages):
response = requests.get(f"{url}&p={page}").content.decode("utf-8")
urls_ = Client.extract_video_urls(response)

for url in urls_:
url = f"https://www.xvideos.com{url}"

if REGEX_VIDEO_CHECK_URL.match(url):
urls.append(url)

for id in urls:
yield Video(id)

0 comments on commit da42885

Please sign in to comment.