-
-
Notifications
You must be signed in to change notification settings - Fork 153
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
test(pacer): introduce tests for command to fetch docs from PACER
- Loading branch information
Showing
1 changed file
with
192 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
import random | ||
from unittest.mock import patch | ||
|
||
from cl.recap.models import PacerFetchQueue | ||
from cl.search.factories import ( | ||
CourtFactory, | ||
DocketEntryFactory, | ||
DocketFactory, | ||
RECAPDocumentFactory, | ||
) | ||
from cl.search.management.commands.pacer_bulk_fetch import Command | ||
from cl.search.models import Docket, RECAPDocument | ||
from cl.tests.cases import TestCase | ||
from cl.users.factories import UserFactory | ||
|
||
|
||
class BulkFetchPacerDocsTest(TestCase): | ||
@classmethod | ||
def setUpTestData(cls): | ||
cls.user = UserFactory() | ||
|
||
cls.courts = [CourtFactory() for _ in range(6)] | ||
|
||
dockets_per_court = 15 | ||
entries_per_docket = 8 | ||
|
||
page_count_ranges = [ | ||
(1000, 2000), | ||
(500, 999), | ||
(100, 499), | ||
(1, 99), | ||
] | ||
cls.big_page_count = 1000 | ||
cls.big_docs_count = 0 | ||
|
||
for court in cls.courts: | ||
[DocketFactory(court=court) for _ in range(dockets_per_court)] | ||
|
||
for docket in Docket.objects.all(): | ||
docket_entries = [ | ||
DocketEntryFactory(docket=docket) | ||
for _ in range(entries_per_docket) | ||
] | ||
|
||
for de in docket_entries: | ||
min_pages, max_pages = random.choice(page_count_ranges) | ||
page_count = random.randint(min_pages, max_pages) | ||
cls.big_docs_count += 1 if page_count >= 1000 else 0 | ||
RECAPDocumentFactory( | ||
docket_entry=de, | ||
page_count=page_count, | ||
is_available=False, | ||
) | ||
|
||
def setUp(self): | ||
self.command = Command() | ||
self.big_docs_created = RECAPDocument.objects.filter( | ||
page_count__gte=self.big_page_count, | ||
is_available=False, | ||
pacer_doc_id__isnull=False, | ||
) | ||
self.assertEqual(self.big_docs_count, self.big_docs_created.count()) | ||
|
||
@patch("time.sleep") | ||
@patch("cl.search.management.commands.pacer_bulk_fetch.do_pacer_fetch") | ||
def test_document_filtering( | ||
self, | ||
mock_fetch, | ||
mock_sleep, | ||
): | ||
"""Test document filtering according to command arguments passed.""" | ||
self.command.handle( | ||
min_page_count=self.big_page_count, | ||
request_interval=1.0, | ||
username=self.user.username, | ||
testing=True, | ||
) | ||
|
||
self.assertEqual( | ||
mock_fetch.call_count, | ||
self.big_docs_count, | ||
f"Expected {self.big_docs_count} documents to be processed", | ||
) | ||
|
||
fetch_queues = PacerFetchQueue.objects.all() | ||
self.assertEqual( | ||
fetch_queues.count(), | ||
self.big_docs_count, | ||
f"Expected {self.big_docs_count} fetch queues", | ||
) | ||
|
||
enqueued_doc_ids = [fq.recap_document_id for fq in fetch_queues] | ||
big_doc_ids = self.big_docs_created.values_list("id", flat=True) | ||
self.assertSetEqual(set(enqueued_doc_ids), set(big_doc_ids)) | ||
|
||
@patch("time.sleep") | ||
@patch("cl.search.management.commands.pacer_bulk_fetch.do_pacer_fetch") | ||
def test_rate_limiting(self, mock_fetch, mock_sleep): | ||
"""Test rate limiting.""" | ||
interval = 2.0 | ||
self.command.handle( | ||
min_page_count=1000, | ||
request_interval=interval, | ||
username=self.user.username, | ||
testing=True, | ||
) | ||
|
||
self.assertEqual( | ||
mock_sleep.call_count, | ||
mock_fetch.call_count - 1, | ||
"Sleep should be called between each fetch", | ||
) | ||
|
||
for call in mock_sleep.call_args_list: | ||
self.assertEqual( | ||
call.args[0], | ||
interval, | ||
f"Expected sleep interval of {interval} seconds", | ||
) | ||
|
||
@patch("time.sleep") | ||
@patch("cl.search.management.commands.pacer_bulk_fetch.do_pacer_fetch") | ||
def test_error_handling(self, mock_fetch, mock_sleep): | ||
"""Test that errors are handled gracefully""" | ||
mock_fetch.side_effect = Exception("PACER API error") | ||
|
||
self.command.handle( | ||
min_page_count=1000, | ||
request_interval=1.0, | ||
username=self.user.username, | ||
testing=True, | ||
) | ||
|
||
self.assertEqual( | ||
PacerFetchQueue.objects.count(), | ||
self.big_docs_count, | ||
) | ||
|
||
@patch("time.sleep") | ||
@patch("cl.search.management.commands.pacer_bulk_fetch.do_pacer_fetch") | ||
def test_round_robin(self, mock_fetch, mock_sleep): | ||
""" | ||
Verify that each call to 'execute_round' never processes the same court | ||
more than once. | ||
""" | ||
calls_per_round = [] | ||
original_execute_round = self.command.execute_round | ||
|
||
def track_rounds_side_effect(remaining_courts, options, is_last_round): | ||
""" | ||
Compares the mock_fetch calls before and after calling execute_round, | ||
then saves new calls that occurred during this round. | ||
""" | ||
start_index = len(mock_fetch.call_args_list) | ||
updated_remaining = original_execute_round( | ||
remaining_courts, options, is_last_round | ||
) | ||
end_index = len(mock_fetch.call_args_list) | ||
current_round_calls = mock_fetch.call_args_list[ | ||
start_index:end_index | ||
] | ||
calls_per_round.append(current_round_calls) | ||
|
||
return updated_remaining | ||
|
||
with patch.object( | ||
Command, "execute_round", side_effect=track_rounds_side_effect | ||
): | ||
# Run command with patched execute_round to save do_pacer_fetch | ||
# calls in each round | ||
self.command.handle( | ||
min_page_count=1000, | ||
request_interval=1.0, | ||
username=self.user.username, | ||
testing=True, | ||
) | ||
|
||
for round_index, round_calls in enumerate(calls_per_round, start=1): | ||
court_ids_this_round = [] | ||
|
||
for call in round_calls: | ||
fetch_queue_obj = call.args[0] | ||
court_id = ( | ||
fetch_queue_obj.recap_document.docket_entry.docket.court_id | ||
) | ||
court_ids_this_round.append(court_id) | ||
|
||
self.assertEqual( | ||
len(court_ids_this_round), | ||
len(set(court_ids_this_round)), | ||
f"Round {round_index} had duplicate courts: {court_ids_this_round}", | ||
) |