Skip to content

Commit

Permalink
Fix result duplication (#30)
Browse files Browse the repository at this point in the history
* Don't encode page=1 in url for reuse
* Add 'from' field to paginated url
* Bump version
  • Loading branch information
GalenReich authored Jun 18, 2024
1 parent 921d92f commit 3e4ca09
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 6 deletions.
6 changes: 1 addition & 5 deletions edgar_tool/text_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ def _generate_request_args(
single_forms: Optional[List[str]],
start_date: date,
end_date: date,
page_number: int,
peo_in: Optional[str],
inc_in: Optional[str],
) -> str:
Expand All @@ -204,7 +203,6 @@ def _generate_request_args(
:param single_forms: List of single forms to search for (e.g. ['10-K', '10-Q']), defaults to None
:param start_date: Start date for the custom date range, defaults to 5 years ago to replicate the default behavior of the SEC website
:param end_date: End date for the custom date range, defaults to current date in order to replicate the default behavior of the SEC website
:param page_number: Page number to request, defaults to 1
:param peo_in: Search principal executive offices in a location (e.g. "NY,OH")
:param inc_in: Search incorporated in a location (e.g. "NY,OH")
Expand All @@ -224,7 +222,6 @@ def _generate_request_args(
"dateRange": "custom",
"startdt": start_date.strftime("%Y-%m-%d"),
"enddt": end_date.strftime("%Y-%m-%d"),
"page": page_number,
}

# Add optional parameters
Expand Down Expand Up @@ -286,7 +283,7 @@ def _fetch_search_request_results(
num_pages = self._compute_number_of_pages()

for i in range(1, num_pages + 1):
paginated_url = f"{TEXT_SEARCH_BASE_URL}{search_request_url_args}&page={i}"
paginated_url = f"{TEXT_SEARCH_BASE_URL}{search_request_url_args}&page={i}&from={100*(i-1)}"
try:
self.json_response = fetch_page(
paginated_url,
Expand Down Expand Up @@ -354,7 +351,6 @@ def _generate_search_requests(
single_forms=single_forms,
start_date=start_date,
end_date=end_date,
page_number=1,
peo_in=peo_in,
inc_in=inc_in,
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "edgar-tool"
version = "1.3.0"
version = "1.3.1"
description = "Search and retrieve corporate and financial data from the United States Securities and Exchange Commission (SEC)."
authors = ["Bellingcat"]
license = "GNU General Public License v3 (GPLv3)"
Expand Down

0 comments on commit 3e4ca09

Please sign in to comment.