diff --git a/README.md b/README.md index a571813..76476c5 100644 --- a/README.md +++ b/README.md @@ -401,6 +401,7 @@ Since the `EDTFField` and the `_earliest` and `_latest` field values are set aut ### Running tests - From `python-edtf`, run the unit tests: `pytest` +- From `python-edtf`, run `pytest -m benchmark` to run the benchmarks - From `python-edtf/edtf_django_tests`, run the integration tests: `python manage.py test edtf_integration` - To run CI locally, use `act`, e.g. `act pull_request` or `act --pull=false --container-architecture linux/amd64`. Some steps may require a Github PAT: `act pull_request --container-architecture linux/amd64 --pull=false -s GITHUB_TOKEN=` diff --git a/edtf/natlang/tests.py b/edtf/natlang/tests.py index 78ecbc9..d2c43a5 100644 --- a/edtf/natlang/tests.py +++ b/edtf/natlang/tests.py @@ -185,3 +185,27 @@ def test_natlang(input_text, expected_output): assert ( result == expected_output ), f"Failed for input: {input_text} - expected {expected_output}, got {result}" + + +@pytest.mark.benchmark +@pytest.mark.parametrize( + "input_text,expected_output", + [ + ("23rd Dynasty", None), + ("January 2008", "2008-01"), + ("ca1860", "1860~"), + ("uncertain: approx 1862", "1862%"), + ("January", "XXXX-01"), + ("Winter 1872", "1872-24"), + ("before approx January 18 1928", "/1928-01-18~"), + ("birthday in 1872", "1872"), + ("1270 CE", "1270"), + ("2nd century bce", "-01XX"), + ("1858/1860", "[1858, 1860]"), + ], +) +def test_benchmark_natlang(benchmark, input_text, expected_output): + """ + Benchmark selected natural language conversions + """ + benchmark(text_to_edtf, input_text) diff --git a/edtf/parser/grammar.py b/edtf/parser/grammar.py index e6232c4..1747341 100644 --- a/edtf/parser/grammar.py +++ b/edtf/parser/grammar.py @@ -1,3 +1,11 @@ +# ruff: noqa: E402 I001 + +# It's recommended to `enablePackrat()` immediately after importing pyparsing +# https://github.com/pyparsing/pyparsing/wiki/Performance-Tips +import pyparsing + +pyparsing.ParserElement.enablePackrat() + from pyparsing import ( Combine, NotAny, @@ -13,6 +21,7 @@ ) from pyparsing import Literal as L + from edtf.parser.edtf_exceptions import EDTFParseException # (* ************************** Level 0 *************************** *) diff --git a/edtf/parser/tests.py b/edtf/parser/tests.py index 1ec7452..4932e95 100644 --- a/edtf/parser/tests.py +++ b/edtf/parser/tests.py @@ -216,6 +216,20 @@ ("2001-34", ("2001-04-01", "2001-06-30")), ) +BENCHMARK_EXAMPLES = ( + "2001-02-03", + "2008-12", + "2008", + "-0999", + "2004-01-01T10:10:10+05:00", + "-2005/-1999-02", + "/2006", + "?2004-%06", + "[1667, 1760-12]", + "Y3388E2S3", + "2001-29", +) + BAD_EXAMPLES = ( # parentheses are not used for group qualification in the 2018 spec None, @@ -340,3 +354,10 @@ def test_comparisons(): assert d4 == d5 assert d1 < d5 assert d1 > d6 + + +@pytest.mark.benchmark +@pytest.mark.parametrize("test_input", BENCHMARK_EXAMPLES) +def test_benchmark_parser(benchmark, test_input): + """Benchmark parsing of selected EDTF strings.""" + benchmark(parse, test_input) diff --git a/pyproject.toml b/pyproject.toml index 64579ae..56978fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ test = [ "django>=4.2,<5.0", "pytest", "pytest-django", + "pytest-benchmark", "ruff", "pre-commit", "coverage", @@ -81,8 +82,11 @@ legacy_tox_ini = """ python_files = ["tests.py", "test_*.py", "*_test.py", "*_tests.py"] python_classes = ["Test*", "*Tests"] python_functions = ["test_*"] -addopts = "--ignore=edtf_django_tests/ --cov=edtf" -plugins = ["pytest_cov"] +markers = [ + "benchmark: mark a test as a benchmark", +] +addopts = "--ignore=edtf_django_tests/ --cov=edtf -m 'not benchmark'" +plugins = ["pytest_cov", "pytest_benchmark"] [tool.coverage.run] # we run the edtf_integration tests but only care about them testing fields.py in the main package