Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write-test-suite-#11 #39

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 135 additions & 3 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import pytest
import subprocess
import warnings
from datetime import datetime
from unittest.mock import patch
from edgar_tool.cli import SecEdgarScraperCli


def test_cli_should_return_help_string_when_passed_no_args():
"""Tests that running edgar-tool without any arguments returns the CLI's help string and 0 exit code."""
"""
Tests that running `edgar-tool` without any arguments returns the CLI's help string
and an exit code of 0.
"""
# GIVEN
expected = """
expected_help = """
NAME
edgar-tool

Expand All @@ -26,4 +34,128 @@ def test_cli_should_return_help_string_when_passed_no_args():

# THEN
assert result.returncode == 0
assert result.stdout.strip() == expected.strip()
assert result.stdout.strip() == expected_help.strip()


@patch('edgar_tool.text_search.EdgarTextSearcher.text_search')
def test_text_search_capture_arguments(mock_text_search):
"""
Tests that `SecEdgarScraperCli.text_search` correctly calls the `text_search` method
with the expected arguments.
"""
# ARRANGE: mock_text_search arg is provided by the patch decorator.

# ACT
SecEdgarScraperCli.text_search(
"Tsunami", "Hazards",
output="results.csv",
entity_id="0001030717",
filing_form="all_annual_quarterly_and_current_reports",
start_date="2021-01-01",
end_date="2021-12-31",
min_wait=5.0,
max_wait=7.0,
retries=3,
peo_in="NY, OH",
inc_in=None
)

# ASSERT
mock_text_search.assert_called_once_with(
keywords=["Tsunami", "Hazards"],
entity_id="0001030717",
filing_form="All annual, quarterly, and current reports", # Mapped with TEXT_SEARCH_FILING_VS_MAPPING_CATEGORIES_MAPPING
single_forms=None,
start_date=datetime(2021, 1, 1),
end_date=datetime(2021, 12, 31),
min_wait_seconds=5.0,
max_wait_seconds=7.0,
retries=3,
destination="results.csv",
peo_in="NY,OH", ## Whitespace stripped
inc_in=None
)

@patch("edgar_tool.text_search.write_results_to_file")
def test_text_search_end_to_end(mock_write_results_to_file):
"""
Tests the end-to-end functionality of `SecEdgarScraperCli.text_search` by
verifying that `text_search.write_results_to_file` is called with the correct parameters.
Uses patch to avoid file creation during testing.

Because this can fail due to internet connection issues, this raises a warning when it fails
instead of raising a unittest failure.
"""
# ARRANGE: mock_write_results_to_file arg is provided by the patch decorator.
try:
# ACT
SecEdgarScraperCli.text_search(
"John Doe",
output="results.csv",
start_date="2021-01-01",
end_date="2021-01-31"
)

# Extract and validate the call arguments
call_args = mock_write_results_to_file.call_args
results = list(call_args[0][0])

# ASSERT: Check if 'root_form' is present in the first result
assert 'root_form' in results[0][0]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is results[0][0], and what is the significance of root_form?

What do you think of creating an end-to-end test that searches over a given time period and asserts the exact value written to the patched file?


except Exception as e:
# Because net connection or server issues can cause the above to fail.
warnings.warn(
f"An exception occurred: {str(e)}\n"
"There might be an issue with accessing the SEC website or the SEC's return payload.",
UserWarning
)


@patch('edgar_tool.text_search.EdgarTextSearcher.text_search')
def test_text_search_with_both_peo_in_and_inc_in(mock_text_search):
"""
Tests that `SecEdgarScraperCli.text_search` raises an exception if both `peo_in` and `inc_in`
are provided in the parameters.
"""
# ARRANGE: mock_text_search arg is provided by the patch decorator.
mock_text_search.side_effect = Exception("Use only one of peo_in or inc_in, not both.")

## ACT & ASSERT
with pytest.raises(Exception, match="Use only one of peo_in or inc_in, not both."):
SecEdgarScraperCli.text_search(
["Tsunami", "Hazards"],
start_date="2019-06-01",
end_date="2024-01-01",
inc_in="NY,OH",
peo_in="NY,OH"
)

@patch('edgar_tool.rss.write_results_to_file')
def test_rss_end_to_end(mock_rss):
"""
Tests that `SecEdgarScraperCli.rss` successfully retrieves the RSS feed.
Does not assert anything about the contents because they are liable to change.
Uses patch to suppress file creation during testing.

Because this can fail due to internet connection issues, this raises a warning when it fails
instead of raising a unittest failure.
"""
# ARRANGE: mock_rss arg is provided by the patch decorator.
try:
# ACT: simulates `edgar-tool rss "GOOG" --output "rss_feed.csv"`
SecEdgarScraperCli.rss(
"GOOG",
output="rss_feed.csv"
)

# ASSERT: Checks that rss.write_results_to_file would have been called,
# but does not call it to avoid file creation during testing.
assert mock_rss.call_args
except Exception as e:
# Because net connection or server issues can cause the above to fail.
warnings.warn(
f"An exception occurred: {str(e)}\n"
"There might be an issue with accessing the RSS feed or the return payload.",
UserWarning
)
57 changes: 57 additions & 0 deletions tests/test_text_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import urllib.parse
from datetime import date
from edgar_tool.cli import EdgarTextSearcher

def decode_url(query_string):
"""
Parses the query string anf extracts and sorts the 'forms'
parameter from a query string, then reconstructs the entire
query string with the sorted 'forms' parameter.

This is necessary because raw forms can be order agnostic and
URL strings cannot be compared directly due to encoding differences.

Args:
query_string (str): The query string to parse and modify.

Returns:
str: The query string with the 'forms' parameter sorted and the rest of the parameters unchanged.
"""
parsed_query = urllib.parse.parse_qs(query_string)
forms = parsed_query.get('forms', [''])[0]
sorted_forms = ','.join(sorted(forms.split(',')))

# Reconstruct the query string with the sorted 'forms' parameter
parsed_query['forms'] = sorted_forms
return urllib.parse.urlencode(parsed_query, doseq=True)

def test_generate_request_args():
"""
Tests `EdgarTextSearcher._generate_request_args` to ensure it produces
the correct query string, with 'forms' parameters being order-agnostic.
"""
# ARRANGE & ACT
result = EdgarTextSearcher._generate_request_args(
keywords=['Tsunami', 'Hazards'],
entity_id='0001030717',
filing_form="All annual, quarterly, and current reports",
single_forms=['8-K'],
start_date=date(2019, 6, 1),
end_date=date(2024, 1, 1),
peo_in=None,
inc_in="NY,OH"
)

expected = (
'q=Tsunami+Hazards&dateRange=custom&startdt=2019-06-01&enddt=2024-01-01'
'&locationCodes=NY,OH&locationType=incorporated&entityName=0001030717'
'&forms=15-12B,1-K,40-F,24F-2NT,N-30B-2,NT+10-D,ABS-15G,20-F,1-Z,15-15D'
',6-K,13F-NT,N-MFP1,10-QT,QRTLYRPT,11-KT,15-12G,DSTRBRPT,NSAR-B,25-NSE'
',ABS-EE,N-30D,N-MFP2,ANNLRPT,N-PX,25,NPORT-EX,SP+15D2,NT+20-F,1-SA'
',NSAR-A,1-U,13F-HR,8-K12G3,N-CSR,SD,NT+11-K,N-Q,40-17F2,8-K15D5'
',NT+10-K,10-KT,NSAR-U,NT+10-Q,10-D,15F-15D,10-K,N-CSRS,10-Q,18-K'
',IRANNOTICE,1-Z-W,15F-12G,11-K,N-CEN,15F-12B,N-MFP,8-K,40-17G'
)

# ASSERT
assert decode_url(result) == decode_url(expected)
Loading