fix input schema field type, fix logging

MQ37 · Jan 13, 2025 · d3729ef · d3729ef
1 parent 0b00e65
commit d3729ef
Show file tree

Hide file tree

Showing 3 changed files with 11 additions and 9 deletions.
diff --git a/.actor/input_schema.json b/.actor/input_schema.json
@@ -18,5 +18,5 @@
       "default": 1
     }
   },
-  "required": ["url"]
+  "required": ["startUrl"]
 }
diff --git a/src/helpers.py b/src/helpers.py
@@ -13,6 +13,7 @@
 if TYPE_CHECKING:
     from apify_client.clients import KeyValueStoreClientAsync
 
+logger = logging.getLogger('apify')
 
 def get_hostname_path_string_from_url(url: str) -> str:
     """Extracts the hostname and path from the URL."""
@@ -38,10 +39,10 @@ async def get_description_from_kvstore(kvstore: KeyValueStoreClientAsync, html_u
     """Extracts the description from the HTML content stored in the KV store."""
     store_id = html_url.split('records/')[-1]
     if not (record := await kvstore.get_record(store_id)):
-        logging.warning(f'Failed to get record with id "{store_id}"!')
+        logger.warning(f'Failed to get record with id "{store_id}"!')
         return None
     if not (html := record.get('value')) or not isinstance(html, str):
-        logging.warning(f'Invalid HTML content for record with id "{store_id}"!')
+        logger.warning(f'Invalid HTML content for record with id "{store_id}"!')
         return None
 
     return get_description_from_html(html)

diff --git a/src/main.py b/src/main.py
@@ -17,6 +17,7 @@
 if TYPE_CHECKING:
     from src.types import SectionDict
 
+logger = logging.getLogger('apify')
 
 async def main() -> None:
     """Main entry point for the Apify Actor.
@@ -34,7 +35,7 @@ async def main() -> None:
         max_crawl_depth = int(actor_input.get('maxCrawlDepth', 1))
 
         # call apify/website-content-crawler actor to get the html content
-        logging.info(f'Starting the "apify/website-content-crawler" actor for URL: {url}')
+        logger.info(f'Starting the "apify/website-content-crawler" actor for URL: {url}')
         actor_run_details = await Actor.call(
             'apify/website-content-crawler',
             get_crawler_actor_config(url, max_crawl_depth=max_crawl_depth),
@@ -57,13 +58,13 @@ async def main() -> None:
 
         async for item in run_dataset.iterate_items():
             item_url = item.get('url')
-            logging.info(f'Processing page: {item_url}')
+            logger.info(f'Processing page: {item_url}')
             if item_url is None:
-                logging.warning('Missing "url" attribute in dataset item!')
+                logger.warning('Missing "url" attribute in dataset item!')
                 continue
             html_url = item.get('htmlUrl')
             if html_url is None:
-                logging.warning('Missing "htmlUrl" attribute in dataset item!')
+                logger.warning('Missing "htmlUrl" attribute in dataset item!')
                 continue
 
             is_root = item_url == url
@@ -93,7 +94,7 @@ async def main() -> None:
         # save into kv-store as a file to be able to download it
         store = await Actor.open_key_value_store()
         await store.set_value('llms.txt', output)
-        logging.info('Saved the "llms.txt" file into the key-value store!')
+        logger.info('Saved the "llms.txt" file into the key-value store!')
 
         await Actor.push_data({'llms.txt': output})
-        logging.info('Pushed the "llms.txt" file to the dataset!')
+        logger.info('Pushed the "llms.txt" file to the dataset!')