Skip to content

Commit

Permalink
fix input schema field type, fix logging
Browse files Browse the repository at this point in the history
  • Loading branch information
MQ37 committed Jan 13, 2025
1 parent 0b00e65 commit d3729ef
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .actor/input_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,5 @@
"default": 1
}
},
"required": ["url"]
"required": ["startUrl"]
}
5 changes: 3 additions & 2 deletions src/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
if TYPE_CHECKING:
from apify_client.clients import KeyValueStoreClientAsync

logger = logging.getLogger('apify')

def get_hostname_path_string_from_url(url: str) -> str:
"""Extracts the hostname and path from the URL."""
Expand All @@ -38,10 +39,10 @@ async def get_description_from_kvstore(kvstore: KeyValueStoreClientAsync, html_u
"""Extracts the description from the HTML content stored in the KV store."""
store_id = html_url.split('records/')[-1]
if not (record := await kvstore.get_record(store_id)):
logging.warning(f'Failed to get record with id "{store_id}"!')
logger.warning(f'Failed to get record with id "{store_id}"!')
return None
if not (html := record.get('value')) or not isinstance(html, str):
logging.warning(f'Invalid HTML content for record with id "{store_id}"!')
logger.warning(f'Invalid HTML content for record with id "{store_id}"!')
return None

return get_description_from_html(html)
Expand Down
13 changes: 7 additions & 6 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
if TYPE_CHECKING:
from src.types import SectionDict

logger = logging.getLogger('apify')

async def main() -> None:
"""Main entry point for the Apify Actor.
Expand All @@ -34,7 +35,7 @@ async def main() -> None:
max_crawl_depth = int(actor_input.get('maxCrawlDepth', 1))

# call apify/website-content-crawler actor to get the html content
logging.info(f'Starting the "apify/website-content-crawler" actor for URL: {url}')
logger.info(f'Starting the "apify/website-content-crawler" actor for URL: {url}')
actor_run_details = await Actor.call(
'apify/website-content-crawler',
get_crawler_actor_config(url, max_crawl_depth=max_crawl_depth),
Expand All @@ -57,13 +58,13 @@ async def main() -> None:

async for item in run_dataset.iterate_items():
item_url = item.get('url')
logging.info(f'Processing page: {item_url}')
logger.info(f'Processing page: {item_url}')
if item_url is None:
logging.warning('Missing "url" attribute in dataset item!')
logger.warning('Missing "url" attribute in dataset item!')
continue
html_url = item.get('htmlUrl')
if html_url is None:
logging.warning('Missing "htmlUrl" attribute in dataset item!')
logger.warning('Missing "htmlUrl" attribute in dataset item!')
continue

is_root = item_url == url
Expand Down Expand Up @@ -93,7 +94,7 @@ async def main() -> None:
# save into kv-store as a file to be able to download it
store = await Actor.open_key_value_store()
await store.set_value('llms.txt', output)
logging.info('Saved the "llms.txt" file into the key-value store!')
logger.info('Saved the "llms.txt" file into the key-value store!')

await Actor.push_data({'llms.txt': output})
logging.info('Pushed the "llms.txt" file to the dataset!')
logger.info('Pushed the "llms.txt" file to the dataset!')

0 comments on commit d3729ef

Please sign in to comment.