From d3729ef7c729b0c51a2b0fbaaf5c61c1afd3e3eb Mon Sep 17 00:00:00 2001
From: MQ <me@kopecky.io>
Date: Mon, 13 Jan 2025 20:51:03 +0100
Subject: [PATCH] fix input schema field type, fix logging

---
 .actor/input_schema.json |  2 +-
 src/helpers.py           |  5 +++--
 src/main.py              | 13 +++++++------
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/.actor/input_schema.json b/.actor/input_schema.json
index 9023c0a..d064471 100644
--- a/.actor/input_schema.json
+++ b/.actor/input_schema.json
@@ -18,5 +18,5 @@
       "default": 1
     }
   },
-  "required": ["url"]
+  "required": ["startUrl"]
 }
diff --git a/src/helpers.py b/src/helpers.py
index ba7d141..52ba963 100644
--- a/src/helpers.py
+++ b/src/helpers.py
@@ -13,6 +13,7 @@
 if TYPE_CHECKING:
     from apify_client.clients import KeyValueStoreClientAsync
 
+logger = logging.getLogger('apify')
 
 def get_hostname_path_string_from_url(url: str) -> str:
     """Extracts the hostname and path from the URL."""
@@ -38,10 +39,10 @@ async def get_description_from_kvstore(kvstore: KeyValueStoreClientAsync, html_u
     """Extracts the description from the HTML content stored in the KV store."""
     store_id = html_url.split('records/')[-1]
     if not (record := await kvstore.get_record(store_id)):
-        logging.warning(f'Failed to get record with id "{store_id}"!')
+        logger.warning(f'Failed to get record with id "{store_id}"!')
         return None
     if not (html := record.get('value')) or not isinstance(html, str):
-        logging.warning(f'Invalid HTML content for record with id "{store_id}"!')
+        logger.warning(f'Invalid HTML content for record with id "{store_id}"!')
         return None
 
     return get_description_from_html(html)
diff --git a/src/main.py b/src/main.py
index 8f74df3..082d649 100644
--- a/src/main.py
+++ b/src/main.py
@@ -17,6 +17,7 @@
 if TYPE_CHECKING:
     from src.types import SectionDict
 
+logger = logging.getLogger('apify')
 
 async def main() -> None:
     """Main entry point for the Apify Actor.
@@ -34,7 +35,7 @@ async def main() -> None:
         max_crawl_depth = int(actor_input.get('maxCrawlDepth', 1))
 
         # call apify/website-content-crawler actor to get the html content
-        logging.info(f'Starting the "apify/website-content-crawler" actor for URL: {url}')
+        logger.info(f'Starting the "apify/website-content-crawler" actor for URL: {url}')
         actor_run_details = await Actor.call(
             'apify/website-content-crawler',
             get_crawler_actor_config(url, max_crawl_depth=max_crawl_depth),
@@ -57,13 +58,13 @@ async def main() -> None:
 
         async for item in run_dataset.iterate_items():
             item_url = item.get('url')
-            logging.info(f'Processing page: {item_url}')
+            logger.info(f'Processing page: {item_url}')
             if item_url is None:
-                logging.warning('Missing "url" attribute in dataset item!')
+                logger.warning('Missing "url" attribute in dataset item!')
                 continue
             html_url = item.get('htmlUrl')
             if html_url is None:
-                logging.warning('Missing "htmlUrl" attribute in dataset item!')
+                logger.warning('Missing "htmlUrl" attribute in dataset item!')
                 continue
 
             is_root = item_url == url
@@ -93,7 +94,7 @@ async def main() -> None:
         # save into kv-store as a file to be able to download it
         store = await Actor.open_key_value_store()
         await store.set_value('llms.txt', output)
-        logging.info('Saved the "llms.txt" file into the key-value store!')
+        logger.info('Saved the "llms.txt" file into the key-value store!')
 
         await Actor.push_data({'llms.txt': output})
-        logging.info('Pushed the "llms.txt" file to the dataset!')
+        logger.info('Pushed the "llms.txt" file to the dataset!')