- implemented searching

- unit tests for searching - implemented searching filters
EchterAlsFake · Feb 5, 2024 · da42885 · da42885
1 parent e4572fb
commit da42885
Show file tree

Hide file tree

Showing 4 changed files with 117 additions and 18 deletions.
diff --git a/xvideos_api/modules/consts.py b/xvideos_api/modules/consts.py
@@ -1,6 +1,6 @@
 import re
 
-REGEX_VIDEO_CHECK_URL = re.compile(r'https://www.xvideos.com/(.*?)')
+REGEX_VIDEO_CHECK_URL = re.compile(r'https://www.xvideos.com/video(.*?)')
 REGEX_VIDEO_M3U8 = re.compile(r"html5player\.setVideoHLS\('([^']+)'\);")
 REGEX_VIDEO_TAGS = re.compile(r'href="/tags/(.*?)" class="is-keyword', re.DOTALL)
 REGEX_VIDEO_VIEWS = re.compile(r'<strong class="mobile-hide">(.*?)</strong>')
@@ -12,4 +12,4 @@
 REGEX_VIDEO_LENGTH = re.compile(r'<span class="duration">(.*?)</span>')
 REGEX_VIDEO_PORNSTARS = re.compile(r'a href="/models/(.*?)" class=')
 
-REGEX_SEARCH_SCRAPE_VIDEOS = re.compile(r'<div id="video_(.*?)" data-id="')
+REGEX_SEARCH_SCRAPE_VIDEOS = re.compile(r'none;"><a href="(.*?)">', re.DOTALL)
diff --git a/xvideos_api/modules/sorting.py b/xvideos_api/modules/sorting.py
@@ -17,6 +17,7 @@ class SortDate:
 
 
 class SortVideoTime:
+    Sort_all = "allduration"
     Sort_short = "1-3min"
     Sort_middle = "3-10min"
     Sort_long = "10min_more"

diff --git a/xvideos_api/tests/test_search.py b/xvideos_api/tests/test_search.py
@@ -0,0 +1,86 @@
+from ..xvideos_api import Client, Sort, SortVideoTime, SortQuality, SortDate
+
+# This is a deep test for the searching functionalities...
+
+client = Client()
+query = "Mia Khalifa"
+
+
+def video_object_test(object):
+    for idx, video in enumerate(object):
+        assert isinstance(video.title, str) and len(video.title) > 0
+
+        if idx == 3:
+            break
+
+
+def test_base_search():
+    videos = client.search(query, pages=1)
+    for video in videos:
+        assert isinstance(video.title, str) and len(video.title) > 0
+
+
+def test_Sort_search():
+    videos = client.search(query, sorting_Sort=Sort.Sort_rating)
+    videos_2 = client.search(query, sorting_Sort=Sort.Sort_relevance)
+    videos_3 = client.search(query, sorting_Sort=Sort.Sort_views)
+    videos_4 = client.search(query, sorting_Sort=Sort.Sort_length)
+    videos_5 = client.search(query, sorting_Sort=Sort.Sort_random)
+    videos_6 = client.search(query, sorting_Sort=Sort.Sort_upload_date)
+
+    video_object_test(videos)
+    video_object_test(videos_2)
+    video_object_test(videos_3)
+    video_object_test(videos_4)
+    video_object_test(videos_5)
+    video_object_test(videos_6)
+
+
+def test_SortVideoTime_search():
+    videos = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_long)
+    videos_2 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_all)
+    videos_3 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_short)
+    videos_4 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_middle)
+    videos_5 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_really_long)
+    videos_6 = client.search(query, pages=1, sorting_Time=SortVideoTime.Sort_long_10_20min)
+
+
+
+    video_object_test(videos)
+    video_object_test(videos_2)
+    video_object_test(videos_3)
+    video_object_test(videos_4)
+    video_object_test(videos_5)
+    video_object_test(videos_6)
+
+
+def test_SortQuality_search():
+    videos = client.search(query, pages=1, sort_Quality=SortQuality.Sort_720p)
+    videos_2 = client.search(query, pages=1, sort_Quality=SortQuality.Sort_all)
+    videos_3 = client.search(query, pages=1, sort_Quality=SortQuality.Sort_1080_plus)
+
+    video_object_test(videos)
+    video_object_test(videos_2)
+    video_object_test(videos_3)
+
+
+def test_SortDate_search():
+    videos = client.search(query, pages=1, sorting_Date=SortDate.Sort_all)
+    videos_2 = client.search(query, pages=1, sorting_Date=SortDate.Sort_week)
+    videos_3 = client.search(query, pages=1, sorting_Date=SortDate.Sort_month)
+    videos_4 = client.search(query, pages=1, sorting_Date=SortDate.Sort_last_3_days)
+    videos_5 = client.search(query, pages=1, sorting_Date=SortDate.Sort_last_3_months)
+    videos_6 = client.search(query, pages=1, sorting_Date=SortDate.Sort_last_6_months)
+
+    video_object_test(videos)
+    video_object_test(videos_2)
+    video_object_test(videos_3)
+    video_object_test(videos_4)
+    video_object_test(videos_5)
+    video_object_test(videos_6)
+
+
+
+
+
+
diff --git a/xvideos_api/xvideos_api.py b/xvideos_api/xvideos_api.py
@@ -51,7 +51,6 @@ def __init__(self, url):
         self.json_data = self.flatten_json(nested_json=self.extract_json_from_html())
         self.script_content = self.get_script_content()
 
-
     @classmethod
     def check_url(cls, url):
         match = REGEX_VIDEO_CHECK_URL.match(url)
@@ -272,27 +271,40 @@ def get_video(cls, url):
         return Video(url)
 
     @classmethod
-    def search(cls, query, sorting_Sort: Sort, sorting_Date: SortDate, sorting_Time: SortVideoTime,
-               sort_Quality: SortQuality, pages=2):
-
-        url = f"https://www.xvideos.com/?k={query}&sort={sorting_Sort}%&datef={sorting_Date}&durf={sorting_Time}&quality={sort_Quality}"
-        videos_ids = []
-
-        for page in range(pages):
-            response = requests.get(f"{url}&p={page}").content.decode("utf-8")
-            list_ids = REGEX_SEARCH_SCRAPE_VIDEOS.findall(response)
+    def extract_video_urls(cls, html_content):
+        # Parse the HTML content with BeautifulSoup
+        soup = BeautifulSoup(html_content, 'lxml')
+        video_urls = []
 
-            for video_id in list_ids:
-                videos_ids.append(video_id)
-
-        for id in videos_ids:
-            yield Video(f"https://xvideos.com/video{id}")
+        # Find all 'div' elements with the class 'thumb'
+        thumb_divs = soup.find_all('div', class_='thumb')
 
+        # Iterate over each 'thumb' div and extract the 'href' attribute from the 'a' tag within it
+        for div in thumb_divs:
+            a_tag = div.find('a', href=True)  # Find the first 'a' tag with an 'href' attribute
+            if a_tag and a_tag['href']:  # Ensure the 'a' tag and its 'href' attribute exist
+                video_urls.append(a_tag['href'])
 
+        return video_urls
 
+    @classmethod
+    def search(cls, query, sorting_Sort: Sort = Sort.Sort_relevance, sorting_Date: SortDate = SortDate.Sort_all,
+               sorting_Time: SortVideoTime = SortVideoTime.Sort_all, sort_Quality: SortQuality = SortQuality.Sort_all,
+               pages=2):
 
+        query = query.replace(" ", "+")
 
-Client().search(query="mia", sort_Quality=SortQuality.Sort_720p, sorting_Sort=Sort.Sort_rating, sorting_Date=SortDate.Sort_all, sorting_Time=SortVideoTime.Sort_long)
+        url = f"https://www.xvideos.com/?k={query}&sort={sorting_Sort}%&datef={sorting_Date}&durf={sorting_Time}&quality={sort_Quality}"
+        urls = []
+        for page in range(pages):
+            response = requests.get(f"{url}&p={page}").content.decode("utf-8")
+            urls_ = Client.extract_video_urls(response)
 
+            for url in urls_:
+                url = f"https://www.xvideos.com{url}"
 
+                if REGEX_VIDEO_CHECK_URL.match(url):
+                    urls.append(url)
 
+        for id in urls:
+            yield Video(id)