From 8d9cb238e2331f4509c2534aa25cc0fbe36f0faf Mon Sep 17 00:00:00 2001 From: Eduardo Rodrigues Date: Tue, 25 Jul 2023 15:26:01 +0200 Subject: [PATCH] Enhance Lark parsing grammar to parse all LHCb dec files (#356) * Enhance Lark parsing grammar to parse all LHCb dec files * Add a test and fix a little issue * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/decaylanguage/data/decfile.lark | 24 +++++++++++--------- src/decaylanguage/dec/dec.py | 3 +++ tests/data/test_multiline_model.dec | 35 +++++++++++++++++++++++++++++ tests/dec/test_dec.py | 9 ++++++++ 4 files changed, 60 insertions(+), 11 deletions(-) create mode 100644 tests/data/test_multiline_model.dec diff --git a/src/decaylanguage/data/decfile.lark b/src/decaylanguage/data/decfile.lark index beeb467b..2dc14577 100644 --- a/src/decaylanguage/data/decfile.lark +++ b/src/decaylanguage/data/decfile.lark @@ -28,8 +28,6 @@ define : "Define" LABEL SIGNED_NUMBER particle_def: "Particle" LABEL SIGNED_NUMBER SIGNED_NUMBER // Set the mass and width of a particle (in GeV) -model_alias : "ModelAlias" model_label model - alias : "Alias" LABEL LABEL chargeconj : "ChargeConj" LABEL LABEL @@ -55,9 +53,20 @@ label : LABEL particle : LABEL // Add full particle parsing here model_label : LABEL -model : (model_label _SEMICOLON+ | MODEL_NAME_AND_SC | MODEL_NAME_AND_WS model_options _SEMICOLON+) +model_alias : "ModelAlias" model_label model + +model : (model_label _SEMICOLON | MODEL_NAME_AND_SC | MODEL_NAME_AND_WS model_options _SEMICOLON+ | MODEL_NAME_MULTILINE model_options _SEMICOLON+) model_options : (value | LABEL | _NEWLINE | _COMMA)+ +// Model names must either be followed by at least one whitespace (when they have model parameters) or a semicolon (without model parameters) +// We must set priorities here to use lalr - match model name above label, and label above something else + +MODEL_NAME_AND_WS.2: MODEL_NAME _WS+ +MODEL_NAME_AND_SC.2: MODEL_NAME _WS* _SEMICOLON+ +MODEL_NAME_MULTILINE.2: MODEL_NAME NEWLINE _WS* + +MODEL_NAME : "BaryonPCR"|"BC_SMN"|"BC_TMN"|"BC_VHAD"|"BC_VMN"|"BCL"|"BGL"|"BLLNUL"|"BNOCB0TO4PICP"|"BNOCBPTO3HPI0"|"BNOCBPTOKSHHH"|"BS_MUMUKK"|"BSTOGLLISRFSR"|"BSTOGLLMNT"|"BT02PI_CP_ISO"|"BTO3PI_CP"|"BTODDALITZCPK"|"BToDiBaryonlnupQCD"|"BTOSLLALI"|"BTOSLLBALL"|"BTOSLLMS"|"BTOSLLMSEXT"|"BTOVLNUBALL"|"BTOXSGAMMA"|"BTOXELNU"|"BTOXSLL"|"BQTOLLLLHYPERCP"|"BQTOLLLL"|"CB3PI-MPP"|"CB3PI-P00"|"D_DALITZ"|"D_hhhh"|"D0GAMMADALITZ"|"doKm"|"DToKpienu"|"ETAPRIME_DALITZ"|"ETA_DALITZ"|"ETA_FULLDALITZ"|"ETA_LLPIPI"|"ETA_PI0DALITZ"|"FLATQ2"|"FLATSQDALITZ"|"FOURBODYPHSP"|"GENERIC_DALITZ"|"GOITY_ROBERTS"|"HELAMP"|"HQET3"|"HQET2"|"HQET"|"imqp"|"ISGW2"|"ISGW"|"KS_PI0MUMU"|"Lb2Baryonlnu"|"Lb2plnuLCSR"|"Lb2plnuLQCD"|"LbAmpGen"|"LLSW"|"LNUGAMMA"|"LQCD"|"MELIKHOV"|"OMEGA_DALITZ"|"PARTWAVE"|"PHI_DALITZ"|"PHSPDECAYTIMECUT"|"PHSPFLATLIFETIME"|"PHSP"|"PI0_DALITZ"|"PROPSLPOLE"|"PTO3P"|"PVV_CPLH"|"PYCONT"|"PYTHIA"|"SLBKPOLE"|"SLL"|"SLN"|"SLPOLE"|"SSD_CP"|"SSD_DirectCP"|"SSS_CP_PNG"|"SSS_CP"|"SSS_CPT"|"STS_CP"|"STS"|"SVP_CP"|"SVP_HELAMP"|"SVP"|"SVS_CP_ISO"|"SVS_CPLH"|"SVS_CP"|"SVS_NONCPEIGEN"|"SVS"|"SVV_CPLH"|"SVV_CP"|"SVV_HELAMP"|"SVV_NONCPEIGEN"|"SVVHELCPMIX"|"TAUHADNU"|"TAULNUNU"|"TAUOLA"|"TAUSCALARNU"|"TAUVECTORNU"|"THREEBODYPHSP"|"TSS"|"TVP"|"TVS_PWAVE"|"VLL"|"VSP_PWAVE"|"VSS_BMIX"|"VSS_MIX"|"VSS"|"VTOSLL"|"VUB"|"VVPIPI"|"VVP"|"VVS_PWAVE"|"XLL"|"YMSTOYNSPIPICLEO"|"YMSTOYNSPIPICLEOBOOST" + // model : model_generic // model_helamp : "HELAMP" (SIGNED_NUMBER SIGNED_NUMBER)+ @@ -65,6 +74,7 @@ model_options : (value | LABEL | _NEWLINE | _COMMA)+ // To use a fast parser, we need to avoid conflicts %import common.WS_INLINE +%import common.NEWLINE %import common.INT %import common.SIGNED_NUMBER @@ -74,14 +84,6 @@ _SEMICOLON: /;/ _COMMA: /,/ _WS: WS_INLINE -// Model names must either be followed by at least one whitespace (when they have model parameters) or a semicolon (without model parameters) -// We must set priorities here to use lalr - match model name above label, and label above something else - -MODEL_NAME_AND_WS.2: MODEL_NAME _WS+ -MODEL_NAME_AND_SC.2: MODEL_NAME _WS* _SEMICOLON+ - -MODEL_NAME : "BaryonPCR"|"BC_SMN"|"BC_TMN"|"BC_VHAD"|"BC_VMN"|"BCL"|"BGL"|"BLLNUL"|"BNOCB0TO4PICP"|"BNOCBPTO3HPI0"|"BNOCBPTOKSHHH"|"BS_MUMUKK"|"BSTOGLLISRFSR"|"BSTOGLLMNT"|"BT02PI_CP_ISO"|"BTO3PI_CP"|"BTODDALITZCPK"|"BToDiBaryonlnupQCD"|"BTOSLLALI"|"BTOSLLBALL"|"BTOSLLMS"|"BTOSLLMSEXT"|"BTOVLNUBALL"|"BTOXSGAMMA"|"BTOXELNU"|"BTOXSLL"|"BQTOLLLLHYPERCP"|"BQTOLLLL"|"CB3PI-MPP"|"CB3PI-P00"|"D_DALITZ"|"D_hhhh"|"D0GAMMADALITZ"|"doKm"|"DToKpienu"|"ETAPRIME_DALITZ"|"ETA_DALITZ"|"ETA_FULLDALITZ"|"ETA_LLPIPI"|"ETA_PI0DALITZ"|"FLATQ2"|"FLATSQDALITZ"|"FOURBODYPHSP"|"GENERIC_DALITZ"|"GOITY_ROBERTS"|"HELAMP"|"HQET3"|"HQET2"|"HQET"|"imqp"|"ISGW2"|"ISGW"|"KS_PI0MUMU"|"Lb2Baryonlnu"|"Lb2plnuLCSR"|"Lb2plnuLQCD"|"LbAmpGen"|"LLSW"|"LNUGAMMA"|"LQCD"|"MELIKHOV"|"OMEGA_DALITZ"|"PARTWAVE"|"PHI_DALITZ"|"PHSPDECAYTIMECUT"|"PHSPFLATLIFETIME"|"PHSP"|"PI0_DALITZ"|"PROPSLPOLE"|"PTO3P"|"PVV_CPLH"|"PYCONT"|"PYTHIA"|"SLBKPOLE"|"SLL"|"SLN"|"SLPOLE"|"SSD_CP"|"SSD_DirectCP"|"SSS_CP_PNG"|"SSS_CP"|"SSS_CPT"|"STS_CP"|"STS"|"SVP_CP"|"SVP_HELAMP"|"SVP"|"SVS_CP_ISO"|"SVS_CPLH"|"SVS_CP"|"SVS_NONCPEIGEN"|"SVS"|"SVV_CPLH"|"SVV_CP"|"SVV_HELAMP"|"SVV_NONCPEIGEN"|"SVVHELCPMIX"|"TAUHADNU"|"TAULNUNU"|"TAUOLA"|"TAUSCALARNU"|"TAUVECTORNU"|"THREEBODYPHSP"|"TSS"|"TVP"|"TVS_PWAVE"|"VLL"|"VSP_PWAVE"|"VSS_BMIX"|"VSS_MIX"|"VSS"|"VTOSLL"|"VUB"|"VVPIPI"|"VVP"|"VVS_PWAVE"|"XLL"|"YMSTOYNSPIPICLEO"|"YMSTOYNSPIPICLEOBOOST" - LABEL : /[a-zA-Z0-9\/\-+*_()'~]+/ COMMENT : /[#][^\n]*/ diff --git a/src/decaylanguage/dec/dec.py b/src/decaylanguage/dec/dec.py index bdd7a726..bf969f8d 100644 --- a/src/decaylanguage/dec/dec.py +++ b/src/decaylanguage/dec/dec.py @@ -1049,6 +1049,9 @@ def MODEL_NAME_AND_WS(self, t: Token) -> Token: def MODEL_NAME_AND_SC(self, t: Token) -> Token: return t.update(value=t.strip(";").strip()) + def MODEL_NAME_MULTILINE(self, t: Token) -> Token: + return t.update(value=t.strip()) + class DecayModelAliasReplacement(Transformer): # type: ignore[misc] """ diff --git a/tests/data/test_multiline_model.dec b/tests/data/test_multiline_model.dec new file mode 100644 index 00000000..1b44eea7 --- /dev/null +++ b/tests/data/test_multiline_model.dec @@ -0,0 +1,35 @@ +# Example decay chain for testing purposes: +# Test for rather complex models defined in several lines + +Decay B+ +1.000 K+ K- pi+ PTO3P + MAXPDF 0.09 + AMPLITUDE RESONANCE BC K*0 + ANGULAR AC + TYPE RBW_ZEMACH + DVFF BLATTWEISSKOPF 4.0 + COEFFICIENT POLAR_RAD 1.0 0.0 + AMPLITUDE LASS BC 1.412 0.294 2.07 3.32 1.8 + COEFFICIENT POLAR_RAD 32.9 -0.38 + AMPLITUDE RESONANCE AB phi + ANGULAR CA + TYPE RBW_ZEMACH + DVFF BLATTWEISSKOPF 4.0 + COEFFICIENT POLAR_RAD 6.04 2.99 + AMPLITUDE RESONANCE AB f_0 0.965 0.695 + ANGULAR CA + TYPE FLATTE 0.165 0.13957 0.13957 + COEFFICIENT POLAR_RAD 5.28 0.48 + AMPLITUDE RESONANCE AB f_0(1500) 1.539 0.257 + ANGULAR CA + TYPE RBW_ZEMACH + COEFFICIENT POLAR_RAD 24.0 1.29 + AMPLITUDE RESONANCE AB chi_c0 + ANGULAR CA + TYPE RBW_ZEMACH + DVFF BLATTWEISSKOPF 4.0 + COEFFICIENT POLAR_RAD 0.437 -1.02 + AMPLITUDE PHASESPACE + COEFFICIENT POLAR_RAD 6.9 -2.29 + ; +Enddecay diff --git a/tests/dec/test_dec.py b/tests/dec/test_dec.py index e7d33103..5b285c91 100644 --- a/tests/dec/test_dec.py +++ b/tests/dec/test_dec.py @@ -485,6 +485,15 @@ def test_decay_model_parsing_with_model_alias(): assert get_model_parameters(dl) == [1.0, -0.303] +def test_multiline_model(): + p = DecFileParser(DIR / "../data/test_multiline_model.dec") + p.parse() + + dl = p._parsed_decays[0].children[1] + assert get_model_name(dl) == "PTO3P" + assert len(get_model_parameters(dl)) == 96 + + def test_duplicate_decay_definitions(): p = DecFileParser(DIR / "../data/duplicate-decays.dec")