Merge branch 'regionNames' into develop

SModelS · Jul 31, 2024 · 0d9028a · 0d9028a
2 parents 7739e23 + cb31229
commit 0d9028a
Show file tree

Hide file tree

Showing 13 changed files with 516 additions and 686 deletions.
diff --git a/docs/manual/source/ConfrontPredictions.rst b/docs/manual/source/ConfrontPredictions.rst
@@ -207,7 +207,7 @@ The :ref:`figure below <combinedSRfigV2>` shows a comparison for `TChiHH <http:/
    Figure: Comparison of `CMS-SUS-20-004 <http://cms-results.web.cern.ch/cms-results/public-results/publications/SUS-20-004/>`_ using SLv1 (left), and SLv2 (right).
 
 
-.. pyhfllhd:
+.. _pyhfllhd:
 
 Full Likelihoods (pyhf) Approach
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

diff --git a/docs/manual/source/DatabaseStructure.rst b/docs/manual/source/DatabaseStructure.rst
@@ -77,10 +77,27 @@ Each |ExpRes| folder contains:
 The ``globalInfo.txt`` file contains the meta information about the |ExpRes|.
 It defines the center-of-mass energy |sqrts|, the integrated luminosity, the id
 used to identify the result and additional information about the source of the
-data.  Here is the content of CMS-SUS-12-024/globalInfo.txt as an example:
+data. In case a statistical model is given (either a :ref:`simplified likelihood <simplifiedllhd>` or a :ref:`full pyhf likelihood <pyhfllhd>`), it is also referenced here. Here is the content of ATLAS-SUSY-2018-04/globalInfo.txt as an example:
 
-.. literalinclude:: /literals/globalInfo.txt
-   :lines: 1-11
+.. literalinclude:: /literals/globalInfo201804.txt
+   :lines: 1-20
+
+In this case, the connection of SModelS with the pyhf model is specified as
+a dictionary with the json file name as the keys, and a list of analysis region
+entries as the values. The region entries contain the information to connect
+the SModelS names (``smodels``) with the pyhf names (``pyhf``), with the region type
+specified as ``type``. If the pyhf name is omitted, it is assumed to be equal to the 
+SModelS name. If the SModelS name is omitted, we assume **None** as value, indicating
+that the pyhf region will not be connected with any SModelS region. This is typically the case for control or validation regions. If the ``type`` is omitted, **SR** is assumed. For the special case of an SR region where the pyhf name conicides with the SModelS one, a simple name string can be used instead of a dictionary, as is illustrated by the ATLAS-SUSY-2018-14 example:
+
+.. literalinclude:: /literals/globalInfo201814.txt
+   :lines: 14
+
+In case of simplified likelihoods, the covariance matrix is supplied in the ``covariance`` field, with the order of the regions specified in a ``datasetOrder`` field, 
+shown in the example given by ATLAS-SUSY-2018-41:
+
+.. literalinclude:: /literals/globalInfo201841.txt
+   :lines: 12-14
 
 * **Experimental Result folder is described by the** `ExpResult Class <experiment.html#experiment.expResultObj.ExpResult>`_
 * **globalInfo files  are descrived by the** `Info Class <experiment.html#experiment.infoObj.Info>`_

diff --git a/docs/manual/source/literals/globalInfo.txt b/docs/manual/source/literals/globalInfo.txt
diff --git a/docs/manual/source/literals/globalInfo201804.txt b/docs/manual/source/literals/globalInfo201804.txt
@@ -0,0 +1,20 @@
+id: ATLAS-SUSY-2018-04
+sqrts: 13*TeV
+lumi: 139.0/fb
+prettyName: 2 hadronic taus
+url: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/SUSY-2018-04/
+arxiv: https://arxiv.org/abs/1911.06660
+publication: Phys. Rev. D 101 (2020) 032009
+publicationDOI: https://doi.org/10.1103/PhysRevD.101.032009
+contact: [email protected]
+private: False
+implementedBy: Wolfgang Waltenberger
+lastUpdate: 2020/1/26
+# the line below configures the statistical model
+jsonFiles: { 'SRcombined.json': [
+    {'pyhf': 'QCR1cut_cuts', 'type': 'CR'},
+    {'pyhf': 'QCR2cut_cuts', 'type': 'CR'},
+    {'smodels': 'SRlow', 'pyhf': 'SR1cut_cuts'},
+    {'smodels': 'SRhigh', 'pyhf': 'SR2cut_cuts'},
+    {'pyhf': 'WCRcut_cuts', 'type': 'CR'}] }
+includeCRs: False
diff --git a/docs/manual/source/literals/globalInfo201814.txt b/docs/manual/source/literals/globalInfo201814.txt
@@ -0,0 +1,15 @@
+id: ATLAS-SUSY-2018-14
+sqrts: 13*TeV
+lumi: 139.0/fb
+prettyName: displaced vertices
+url: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/SUSY-2018-14/
+arxiv: https://arxiv.org/abs/2011.07812
+publication: Phys. Rev. Lett. 127 (2021) 051802
+publicationDOI: https://link.aps.org/doi/10.1103/PhysRevLett.127.051802
+contact: [email protected]
+comment: Search for displaced leptons plus MET. The data was digitized from the figures in the publication.
+private: False
+implementedBy: GA
+lastUpdate: 2021/5/26
+jsonFiles: {'SRee_bkgonly.json': ['SRee'], 'SRmm_bkgonly.json': ['SRmm'], 'Comb_bkgonly.json': ['SRee', 'SRmm', 'SRem']}
+type: displaced
diff --git a/docs/manual/source/literals/globalInfo201841.txt b/docs/manual/source/literals/globalInfo201841.txt
@@ -0,0 +1,13 @@
+id: ATLAS-SUSY-2018-41
+sqrts: 13.0*TeV
+lumi: 139./fb
+prettyName: hadr. EWK
+url: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/SUSY-2018-41/
+arxiv: https://arxiv.org/abs/2108.07586
+publication: Phys. Rev. D 104 (2021) 112010
+publicationDOI: https://doi.org/10.1103/PhysRevD.104.112010
+private: False
+implementedBy: Sahana Narasimha
+lastUpdate: 2023/4/21
+datasetOrder: "SR-2B2Q-Vh", "SR-2B2Q-VZ", "SR-4Q-VV"
+covariance: [[ .61362, 0., 0. ], [ 0., .30989, 0. ], [ 0., 0., .59242 ] ]
diff --git a/smodels/experiment/expAuxiliaryFuncs.py b/smodels/experiment/expAuxiliaryFuncs.py
@@ -581,8 +581,14 @@ def concatenateLines ( oldcontent ):
         that end with '\'  or ',' """
     content=[] ## concatenate lines that end with "," or "\"
     tmp=""
-    for line in oldcontent:
+    import re
+    for i,line in enumerate ( oldcontent ):
         tmp+=line.strip()
+        ## if next line starts with tab or whitespace or "}",
+        ## merge the lines
+        if i < len(oldcontent)-1 and re.match("[ \t}]",oldcontent[i+1] ):
+            # if next line starts with white space, we add also
+            continue
         if tmp != "" and tmp[-1] not in [ ",", '\\' ]:
             content.append ( tmp )
             tmp=""

diff --git a/smodels/matching/theoryPrediction.py b/smodels/matching/theoryPrediction.py
@@ -162,29 +162,25 @@ def setStatsComputer(self):
 
         elif self.dataType() == "combined":
             # Get dictionary with dataset IDs and signal yields
-            srNsigDict = {pred.dataset.getID() :
+            srNsigDict = {ds.getID() : 0.0 for ds in self.dataset.origdatasets}
+            # Update with theory predictions
+            srNsigDict.update({pred.dataset.getID() :
                           (pred.xsection*pred.dataset.getLumi()).asNumber()
-                          for pred in self.datasetPredictions}
+                          for pred in self.datasetPredictions})
 
             # Get ordered list of datasets:
             if hasattr(self.dataset.globalInfo, "covariance"):
                 datasetList = self.dataset.globalInfo.datasetOrder[:]
                 # Get list of signal yields corresponding to the dataset order:
-                srNsigs = [srNsigDict[dataID] if dataID in srNsigDict else 0.0
-                       for dataID in datasetList]
+                srNsigs = [srNsigDict[dataID] for dataID in datasetList]
                 # Get computer
                 computer = StatsComputer.forMultiBinSL(dataset=self.dataset,
                                                        nsig=srNsigs,
                                                        deltas_rel = self.deltas_rel)
 
             elif hasattr(self.dataset.globalInfo, "jsonFiles"):
-                datasetList = [ds.getID() for ds in self.dataset.origdatasets]
-                # Get list of signal yields corresponding to the dataset order:
-                srNsigs = [srNsigDict[dataID] if dataID in srNsigDict else 0.0
-                       for dataID in datasetList]
-                # Get computer
                 computer = StatsComputer.forPyhf(dataset=self.dataset,
-                                                       nsig=srNsigs,
+                                                       nsig=srNsigDict,
                                                        deltas_rel = self.deltas_rel)
 
         self._statsComputer = computer
@@ -691,17 +687,42 @@ def theoryPredictionsFor(database : Database, smsTopDict : Dict,
                 expResults = sum(dataSetResults)
             else:
                 expResults = TheoryPredictionList()
-                bestRes = _getBestResult(dataSetResults)
+                bestRes = _getBestResult(dataSetResults,expResult.globalInfo)
                 if not bestRes is None:
                     expResults.append(bestRes) # Best result = combination if available
 
         for theoPred in expResults:
             theoPred.expResult = expResult
             theoPred.deltas_rel = deltas_rel
-            if not isinstance(theoPred.dataset,CombinedDataSet) and not theoPred.dataset.dataInfo.dataId is None and "CR" in theoPred.dataset.dataInfo.dataId: # Individual CRs shouldn't give results
-                theoPred.upperLimit = None
-            else:
+            tpe = None
+            if isinstance(theoPred.dataset,CombinedDataSet): # Individual CRs shouldn't give results
                 theoPred.upperLimit = theoPred.getUpperLimit()
+                continue
+            else:
+                if hasattr(theoPred.dataset.globalInfo, "jsonFiles"): # Only signal in CRs for jsonFiles so far
+                    for regionSet in theoPred.dataset.globalInfo.jsonFiles.values():
+                        for region in regionSet:
+                            if type(region)==str:
+                                if region == theoPred.dataset.dataInfo.dataId:
+                                    # if given in old format, it is an SR
+                                    tpe = "SR"
+                                    break
+                            elif region["smodels"] == theoPred.dataset.dataInfo.dataId:
+                                tpe = region["type"]
+                                break
+                else:
+                    tpe = "SR"
+
+                if tpe is None:
+                    logger.error(f"Could not find type of region {theoPred.dataType()} from {theoPred.analysisId()}")
+                    sys.exit()
+                    raise SModelSError()
+
+                if tpe == "SR":
+                    theoPred.upperLimit = theoPred.getUpperLimit()
+                else:
+                    theoPred.upperLimit = None
+
         expResults.sortTheoryPredictions()
 
         for theoPred in expResults:
@@ -716,7 +737,7 @@ def theoryPredictionsFor(database : Database, smsTopDict : Dict,
 def _getCombinedResultFor(dataSetResults, expResult):
     """
     Compute the combined result for all datasets, if covariance
-    matrices are available. Return a TheoryPrediction object
+    matrices or jsonFiles are available. Return a TheoryPrediction object
     with the signal cross-section summed over all the signal regions
     and the respective upper limit.
 
@@ -725,8 +746,29 @@ def _getCombinedResultFor(dataSetResults, expResult):
 
     :return: TheoryPrediction object
     """
+    # Don't give combined result if all regions are CRs
+    isNotSR = []
+    for predList in dataSetResults:
+        if hasattr ( expResult.globalInfo, "jsonFiles" ):
+            for regionSet in expResult.globalInfo.jsonFiles.values():
+                for region in regionSet:
+                    if type(region)==str:
+                        region = { "smodels": region, "type": "SR" }
+                        #logger.error ( f"jsonFile has wrong format at {expResult.globalInfo.id}" )
+                        # import sys; sys.exit()
+                    if not "smodels" in region:
+                        region["smodels"]=None
+                    if region['smodels'] == predList[0].dataset.dataInfo.dataId:
+                        if not "type" in region:
+                            region["type"]="SR"
+                        if region['type'] == 'SR':
+                            isNotSR.append(False)
+                        else:
+                            isNotSR.append(True)
+        else:
+            isNotSR = [ False ]
 
-    if all([True if "CR" in predList[0].dataset.dataInfo.dataId else False for predList in dataSetResults]): # Don't give combined result if all regions are CRs
+    if all(isNotSR):
         return None
 
     if len(dataSetResults) == 1:
@@ -777,12 +819,13 @@ def _getCombinedResultFor(dataSetResults, expResult):
     return theoryPrediction
 
 
-def _getBestResult(dataSetResults):
+def _getBestResult(dataSetResults, globalInfo):
     """
     Returns the best result according to the expected upper limit.
     If a combined result is included in the list, always return it.
 
     :param datasetPredictions: list of TheoryPredictionList objects
+    :param globalInfo: globalInfo of the exp result (used to get the region types)
     :return: best result (TheoryPrediction object)
     """
 
@@ -807,8 +850,21 @@ def _getBestResult(dataSetResults):
             logger.error("Multiple clusters should only exist for upper limit results!")
             raise SModelSError()
         dataset = predList[0].dataset
-        if "CR" in dataset.dataInfo.dataId: # A CR cannot be the best SR
+
+        # Only a SR can be the best SR
+        stop = False
+        if hasattr(globalInfo,"jsonFiles"):
+            for regionSet in globalInfo.jsonFiles.values():
+                for region in regionSet:
+                    if type(region) == dict and \
+                            region['smodels'] == dataset.dataInfo.dataId:
+                        if region['type'] != 'SR':
+                            stop = True
+                    if stop: break
+                if stop: break
+        if stop:
             continue
+
         if dataset.getType() != "efficiencyMap":
             txt = (
                 "Multiple data sets should only exist for efficiency map results, but we have them for %s?"