diff --git a/docs/manual/source/ConfrontPredictions.rst b/docs/manual/source/ConfrontPredictions.rst index da20a384ac..289abe90c9 100644 --- a/docs/manual/source/ConfrontPredictions.rst +++ b/docs/manual/source/ConfrontPredictions.rst @@ -207,7 +207,7 @@ The :ref:`figure below ` shows a comparison for `TChiHH `_ using SLv1 (left), and SLv2 (right). -.. pyhfllhd: +.. _pyhfllhd: Full Likelihoods (pyhf) Approach ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/manual/source/DatabaseStructure.rst b/docs/manual/source/DatabaseStructure.rst index 29b4a68ad8..ae83890398 100644 --- a/docs/manual/source/DatabaseStructure.rst +++ b/docs/manual/source/DatabaseStructure.rst @@ -77,10 +77,27 @@ Each |ExpRes| folder contains: The ``globalInfo.txt`` file contains the meta information about the |ExpRes|. It defines the center-of-mass energy |sqrts|, the integrated luminosity, the id used to identify the result and additional information about the source of the -data. Here is the content of CMS-SUS-12-024/globalInfo.txt as an example: +data. In case a statistical model is given (either a :ref:`simplified likelihood ` or a :ref:`full pyhf likelihood `), it is also referenced here. Here is the content of ATLAS-SUSY-2018-04/globalInfo.txt as an example: -.. literalinclude:: /literals/globalInfo.txt - :lines: 1-11 +.. literalinclude:: /literals/globalInfo201804.txt + :lines: 1-20 + +In this case, the connection of SModelS with the pyhf model is specified as +a dictionary with the json file name as the keys, and a list of analysis region +entries as the values. The region entries contain the information to connect +the SModelS names (``smodels``) with the pyhf names (``pyhf``), with the region type +specified as ``type``. If the pyhf name is omitted, it is assumed to be equal to the +SModelS name. If the SModelS name is omitted, we assume **None** as value, indicating +that the pyhf region will not be connected with any SModelS region. This is typically the case for control or validation regions. If the ``type`` is omitted, **SR** is assumed. For the special case of an SR region where the pyhf name conicides with the SModelS one, a simple name string can be used instead of a dictionary, as is illustrated by the ATLAS-SUSY-2018-14 example: + +.. literalinclude:: /literals/globalInfo201814.txt + :lines: 14 + +In case of simplified likelihoods, the covariance matrix is supplied in the ``covariance`` field, with the order of the regions specified in a ``datasetOrder`` field, +shown in the example given by ATLAS-SUSY-2018-41: + +.. literalinclude:: /literals/globalInfo201841.txt + :lines: 12-14 * **Experimental Result folder is described by the** `ExpResult Class `_ * **globalInfo files are descrived by the** `Info Class `_ diff --git a/docs/manual/source/literals/globalInfo.txt b/docs/manual/source/literals/globalInfo.txt deleted file mode 100644 index c3a7b70f98..0000000000 --- a/docs/manual/source/literals/globalInfo.txt +++ /dev/null @@ -1,12 +0,0 @@ -sqrts: 8.0*TeV -lumi: 19.4/fb -id: CMS-SUS-12-024 -prettyName: \slash{E}_{T}+b -url: https://twiki.cern.ch/twiki/bin/view/CMSPublic/PhysicsResultsSUS12024 -arxiv: http://arxiv.org/abs/1305.2390 -publication: http://www.sciencedirect.com/science/article/pii/S0370269313005339 -contact: Keith Ulmer , Josh Thompson , Alessandro Gaz -private: False -implementedBy: Wolfgang Waltenberger -lastUpdate: 2015/5/11 - diff --git a/docs/manual/source/literals/globalInfo201804.txt b/docs/manual/source/literals/globalInfo201804.txt new file mode 100644 index 0000000000..fba64183a1 --- /dev/null +++ b/docs/manual/source/literals/globalInfo201804.txt @@ -0,0 +1,20 @@ +id: ATLAS-SUSY-2018-04 +sqrts: 13*TeV +lumi: 139.0/fb +prettyName: 2 hadronic taus +url: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/SUSY-2018-04/ +arxiv: https://arxiv.org/abs/1911.06660 +publication: Phys. Rev. D 101 (2020) 032009 +publicationDOI: https://doi.org/10.1103/PhysRevD.101.032009 +contact: atlas-phys-susy-conveners@cern.ch +private: False +implementedBy: Wolfgang Waltenberger +lastUpdate: 2020/1/26 +# the line below configures the statistical model +jsonFiles: { 'SRcombined.json': [ + {'pyhf': 'QCR1cut_cuts', 'type': 'CR'}, + {'pyhf': 'QCR2cut_cuts', 'type': 'CR'}, + {'smodels': 'SRlow', 'pyhf': 'SR1cut_cuts'}, + {'smodels': 'SRhigh', 'pyhf': 'SR2cut_cuts'}, + {'pyhf': 'WCRcut_cuts', 'type': 'CR'}] } +includeCRs: False diff --git a/docs/manual/source/literals/globalInfo201814.txt b/docs/manual/source/literals/globalInfo201814.txt new file mode 100644 index 0000000000..ac4a95300d --- /dev/null +++ b/docs/manual/source/literals/globalInfo201814.txt @@ -0,0 +1,15 @@ +id: ATLAS-SUSY-2018-14 +sqrts: 13*TeV +lumi: 139.0/fb +prettyName: displaced vertices +url: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/SUSY-2018-14/ +arxiv: https://arxiv.org/abs/2011.07812 +publication: Phys. Rev. Lett. 127 (2021) 051802 +publicationDOI: https://link.aps.org/doi/10.1103/PhysRevLett.127.051802 +contact: atlas-phys-susy-conveners@cern.ch +comment: Search for displaced leptons plus MET. The data was digitized from the figures in the publication. +private: False +implementedBy: GA +lastUpdate: 2021/5/26 +jsonFiles: {'SRee_bkgonly.json': ['SRee'], 'SRmm_bkgonly.json': ['SRmm'], 'Comb_bkgonly.json': ['SRee', 'SRmm', 'SRem']} +type: displaced diff --git a/docs/manual/source/literals/globalInfo201841.txt b/docs/manual/source/literals/globalInfo201841.txt new file mode 100644 index 0000000000..58e396f2c0 --- /dev/null +++ b/docs/manual/source/literals/globalInfo201841.txt @@ -0,0 +1,13 @@ +id: ATLAS-SUSY-2018-41 +sqrts: 13.0*TeV +lumi: 139./fb +prettyName: hadr. EWK +url: https://atlas.web.cern.ch/Atlas/GROUPS/PHYSICS/PAPERS/SUSY-2018-41/ +arxiv: https://arxiv.org/abs/2108.07586 +publication: Phys. Rev. D 104 (2021) 112010 +publicationDOI: https://doi.org/10.1103/PhysRevD.104.112010 +private: False +implementedBy: Sahana Narasimha +lastUpdate: 2023/4/21 +datasetOrder: "SR-2B2Q-Vh", "SR-2B2Q-VZ", "SR-4Q-VV" +covariance: [[ .61362, 0., 0. ], [ 0., .30989, 0. ], [ 0., 0., .59242 ] ] diff --git a/smodels/experiment/expAuxiliaryFuncs.py b/smodels/experiment/expAuxiliaryFuncs.py index ad03e2701d..d803906314 100644 --- a/smodels/experiment/expAuxiliaryFuncs.py +++ b/smodels/experiment/expAuxiliaryFuncs.py @@ -581,8 +581,14 @@ def concatenateLines ( oldcontent ): that end with '\' or ',' """ content=[] ## concatenate lines that end with "," or "\" tmp="" - for line in oldcontent: + import re + for i,line in enumerate ( oldcontent ): tmp+=line.strip() + ## if next line starts with tab or whitespace or "}", + ## merge the lines + if i < len(oldcontent)-1 and re.match("[ \t}]",oldcontent[i+1] ): + # if next line starts with white space, we add also + continue if tmp != "" and tmp[-1] not in [ ",", '\\' ]: content.append ( tmp ) tmp="" diff --git a/smodels/matching/theoryPrediction.py b/smodels/matching/theoryPrediction.py index 48f4392cf0..a3a6e6e731 100644 --- a/smodels/matching/theoryPrediction.py +++ b/smodels/matching/theoryPrediction.py @@ -162,29 +162,25 @@ def setStatsComputer(self): elif self.dataType() == "combined": # Get dictionary with dataset IDs and signal yields - srNsigDict = {pred.dataset.getID() : + srNsigDict = {ds.getID() : 0.0 for ds in self.dataset.origdatasets} + # Update with theory predictions + srNsigDict.update({pred.dataset.getID() : (pred.xsection*pred.dataset.getLumi()).asNumber() - for pred in self.datasetPredictions} + for pred in self.datasetPredictions}) # Get ordered list of datasets: if hasattr(self.dataset.globalInfo, "covariance"): datasetList = self.dataset.globalInfo.datasetOrder[:] # Get list of signal yields corresponding to the dataset order: - srNsigs = [srNsigDict[dataID] if dataID in srNsigDict else 0.0 - for dataID in datasetList] + srNsigs = [srNsigDict[dataID] for dataID in datasetList] # Get computer computer = StatsComputer.forMultiBinSL(dataset=self.dataset, nsig=srNsigs, deltas_rel = self.deltas_rel) elif hasattr(self.dataset.globalInfo, "jsonFiles"): - datasetList = [ds.getID() for ds in self.dataset.origdatasets] - # Get list of signal yields corresponding to the dataset order: - srNsigs = [srNsigDict[dataID] if dataID in srNsigDict else 0.0 - for dataID in datasetList] - # Get computer computer = StatsComputer.forPyhf(dataset=self.dataset, - nsig=srNsigs, + nsig=srNsigDict, deltas_rel = self.deltas_rel) self._statsComputer = computer @@ -691,17 +687,42 @@ def theoryPredictionsFor(database : Database, smsTopDict : Dict, expResults = sum(dataSetResults) else: expResults = TheoryPredictionList() - bestRes = _getBestResult(dataSetResults) + bestRes = _getBestResult(dataSetResults,expResult.globalInfo) if not bestRes is None: expResults.append(bestRes) # Best result = combination if available for theoPred in expResults: theoPred.expResult = expResult theoPred.deltas_rel = deltas_rel - if not isinstance(theoPred.dataset,CombinedDataSet) and not theoPred.dataset.dataInfo.dataId is None and "CR" in theoPred.dataset.dataInfo.dataId: # Individual CRs shouldn't give results - theoPred.upperLimit = None - else: + tpe = None + if isinstance(theoPred.dataset,CombinedDataSet): # Individual CRs shouldn't give results theoPred.upperLimit = theoPred.getUpperLimit() + continue + else: + if hasattr(theoPred.dataset.globalInfo, "jsonFiles"): # Only signal in CRs for jsonFiles so far + for regionSet in theoPred.dataset.globalInfo.jsonFiles.values(): + for region in regionSet: + if type(region)==str: + if region == theoPred.dataset.dataInfo.dataId: + # if given in old format, it is an SR + tpe = "SR" + break + elif region["smodels"] == theoPred.dataset.dataInfo.dataId: + tpe = region["type"] + break + else: + tpe = "SR" + + if tpe is None: + logger.error(f"Could not find type of region {theoPred.dataType()} from {theoPred.analysisId()}") + sys.exit() + raise SModelSError() + + if tpe == "SR": + theoPred.upperLimit = theoPred.getUpperLimit() + else: + theoPred.upperLimit = None + expResults.sortTheoryPredictions() for theoPred in expResults: @@ -716,7 +737,7 @@ def theoryPredictionsFor(database : Database, smsTopDict : Dict, def _getCombinedResultFor(dataSetResults, expResult): """ Compute the combined result for all datasets, if covariance - matrices are available. Return a TheoryPrediction object + matrices or jsonFiles are available. Return a TheoryPrediction object with the signal cross-section summed over all the signal regions and the respective upper limit. @@ -725,8 +746,29 @@ def _getCombinedResultFor(dataSetResults, expResult): :return: TheoryPrediction object """ + # Don't give combined result if all regions are CRs + isNotSR = [] + for predList in dataSetResults: + if hasattr ( expResult.globalInfo, "jsonFiles" ): + for regionSet in expResult.globalInfo.jsonFiles.values(): + for region in regionSet: + if type(region)==str: + region = { "smodels": region, "type": "SR" } + #logger.error ( f"jsonFile has wrong format at {expResult.globalInfo.id}" ) + # import sys; sys.exit() + if not "smodels" in region: + region["smodels"]=None + if region['smodels'] == predList[0].dataset.dataInfo.dataId: + if not "type" in region: + region["type"]="SR" + if region['type'] == 'SR': + isNotSR.append(False) + else: + isNotSR.append(True) + else: + isNotSR = [ False ] - if all([True if "CR" in predList[0].dataset.dataInfo.dataId else False for predList in dataSetResults]): # Don't give combined result if all regions are CRs + if all(isNotSR): return None if len(dataSetResults) == 1: @@ -777,12 +819,13 @@ def _getCombinedResultFor(dataSetResults, expResult): return theoryPrediction -def _getBestResult(dataSetResults): +def _getBestResult(dataSetResults, globalInfo): """ Returns the best result according to the expected upper limit. If a combined result is included in the list, always return it. :param datasetPredictions: list of TheoryPredictionList objects + :param globalInfo: globalInfo of the exp result (used to get the region types) :return: best result (TheoryPrediction object) """ @@ -807,8 +850,21 @@ def _getBestResult(dataSetResults): logger.error("Multiple clusters should only exist for upper limit results!") raise SModelSError() dataset = predList[0].dataset - if "CR" in dataset.dataInfo.dataId: # A CR cannot be the best SR + + # Only a SR can be the best SR + stop = False + if hasattr(globalInfo,"jsonFiles"): + for regionSet in globalInfo.jsonFiles.values(): + for region in regionSet: + if type(region) == dict and \ + region['smodels'] == dataset.dataInfo.dataId: + if region['type'] != 'SR': + stop = True + if stop: break + if stop: break + if stop: continue + if dataset.getType() != "efficiencyMap": txt = ( "Multiple data sets should only exist for efficiency map results, but we have them for %s?" diff --git a/smodels/statistics/pyhfInterface.py b/smodels/statistics/pyhfInterface.py index 7676917c20..51b5e17b4a 100755 --- a/smodels/statistics/pyhfInterface.py +++ b/smodels/statistics/pyhfInterface.py @@ -19,6 +19,7 @@ import logging logging.getLogger("pyhf").setLevel(logging.CRITICAL) warnings.filterwarnings("ignore") +from typing import Dict, List jsonver = "" try: @@ -83,21 +84,34 @@ class PyhfData: """ Holds data for use in pyhf - :ivar nsignals: signal predictions list divided into sublists, one for each json file + :ivar nsignals: signal predictions dictionary of dictionaries, + one for each json file, one entry per signal region :ivar inputJsons: list of json instances :ivar jsonFiles: optional list of json files :ivar nWS: number of workspaces = number of json files """ - def __init__(self, nsignals, inputJsons, jsonFiles=None, includeCRs=False, signalUncertainty=None): - self.nsignals = nsignals # fb + def __init__( self, nsignals : Dict[str, Dict], inputJsons, jsonFiles=None, + includeCRs=False, signalUncertainty=None): + self.nsignals = nsignals self.getTotalYield() self.inputJsons = inputJsons self.cached_likelihoods = {} ## cache of likelihoods (actually twice_nlls) self.cached_lmaxes = {} # cache of lmaxes (actually twice_nlls) self.cachedULs = {False: {}, True: {}, "posteriori": {}} if jsonFiles is None: # If no name has been provided for the json file(s) and the channels, use fake ones - jsonFiles = dict( zip( [ "dummy%d" % i for i in range(len(inputJsons)) ], [ "" for i in range(len(inputJsons)) ] ) ) + jsonFiles = {} + for jFile,sregions in nsignals.items(): + regions = [] + srname = "SR1" + if len(sregions)==1: + regions.append ( { "smodels": srname, "type": "SR", "pyhf": srname } ) + else: + for j in range( len ( sregions ) ): + srname = f"SR1[{j}]" + regions.append ( { "smodels": srname, "type": "SR", + "pyhf": srname } ) + jsonFiles[ jFile ] = regions self.jsonFiles = jsonFiles self.includeCRs = includeCRs self.signalUncertainty = signalUncertainty @@ -107,14 +121,135 @@ def __init__(self, nsignals, inputJsons, jsonFiles=None, includeCRs=False, signa self.nWS = len(inputJsons) self.errorFlag = False - self.getWSInfo() self.checkConsistency() + self.getWSInfo() def getTotalYield ( self ): """ the total yield in all signal regions """ - S = sum ( [ sum(x) for x in self.nsignals ] ) + S = 0 + for dct in self.nsignals.values(): + for signal in dct.values(): + if isinstance(signal, list): + for sig in signal: + S += sig + else: + S += signal self.totalYield = S + def createPatchForRegion ( self, region, i_ch, ch, jsName ): + if not "pyhf" in region: + region["pyhf"]=region["smodels"] + chname = ch['name'] + chname2 = f'{ch["name"]}[0]' ## binned SRs + if not region["pyhf"] in [ chname, chname2 ]: + return None, None + if (region['type'] == 'SR') or (region['type'] == 'CR' and self.includeCRs and region['smodels'] is not None): + if region['smodels'] not in self.nsignals[jsName]: + logger.error(f"Region {region['smodels']} of {jsName} not in the signal dictionary!") + self.errorFlag = True + return None, None + nBins = len(ch["data"]) + # Find all smodels names if many share the same pyhf name (for multi-bin regions) + smodelsName = [] + ctr = 0 + if region["pyhf"] == chname: # no bins, so easy + smodelsName.append(region['smodels']) + else: ## bins + hasAdded = True + while hasAdded: + hasAdded = False + for rregion in self.jsonFiles[jsName]: + if rregion["pyhf"] == f'{ch["name"]}[{ctr}]': + smodelsName.append(rregion['smodels']) + ctr+=1 + hasAdded = True + if len(smodelsName) != nBins: + logger.error(f"Json region {region['pyhf']} has {nBins} bins, but only {len(smodelsName)} are implemented!") + self.errorFlag = True + return None, None + + signal = [] + for name in smodelsName: # In case of multiple signals for one region, the dict ordering within globalInfo.jsonFiles matters + signal.append(self.nsignals[jsName][name]) + + smodelsName = ";".join( smodelsName ) # Name of the corresponding region(s). Join all names if multiple bins. + + ret = { + "path": f"/channels/{i_ch}/samples/0", + "size": nBins, + "smodelsName": smodelsName, + "signal": signal + }, "signalRegions" + return ret + ret = { 'path': f"/channels/{i_ch}", 'name': chname, + 'type': region['type']}, "otherRegions" + return ( ret ) + + def updatePyhfNames ( self, jsName : str, observations : List ): + """ if no pyhf names are given, get them from the ws, + in the order of the ws """ + if "pyhf" in self.jsonFiles[jsName][0]: + return + + def guessPyhfName ( name : str ) -> str: + if name.startswith ( "CR" ): + return "CR" + if name.startswith ( "VR" ): + return "VR" + if name.startswith ( "SR" ): + return "SR" + if "_CR_" in name: + return "CR" + return "SR" + ## we dont have the mapping smodels<->pyhf + ctr = 0 + #ic ( "---" ) + nJsonFiles = len(self.jsonFiles[jsName]) + #nObs = len(observations) + #ic ( self.includeCRs, nObs, nJsonFiles ) + #ic ( observations ) + #ic ( self.jsonFiles[jsName] ) + nSRs = 0 + for observation in observations: + name = observation["name"] + regionType = guessPyhfName ( name ) + if regionType == "SR": + nSRs += 1 + # ic ( nSRs, nCRs ) + for observation in observations: + name = observation["name"] + regionType = guessPyhfName ( name ) + if regionType in [ "VR" ]: + region = { "pyhf": observation["name"], "smodels": None, + "type": regionType } + self.jsonFiles[jsName].append ( region ) + continue + if not self.includeCRs and regionType in [ "CR" ]: + region = { "pyhf": observation["name"], "smodels": None, + "type": regionType } + self.jsonFiles[jsName].append ( region ) + continue + if self.includeCRs and regionType in [ "CR" ]: + if nSRs == nJsonFiles: # and nSRs+nCRs == nObs: + ## the signal regions alone do it + region = { "pyhf": observation["name"], "smodels": None, + "type": regionType } + self.jsonFiles[jsName].append ( region ) + continue + + if len(observation["data"])==1: + if ctr < len(self.jsonFiles[jsName]): + self.jsonFiles[jsName][ctr]["pyhf"]=f"{name}" + self.jsonFiles[jsName][ctr]["type"]=regionType + ctr += 1 + else: + for i in range(len(observation["data"])): + if ctr < len(self.jsonFiles[jsName]): + self.jsonFiles[jsName][ctr]["pyhf"]=f"{name}[{i}]" + self.jsonFiles[jsName][ctr]["type"]=regionType + ctr += 1 + + def getWSInfo(self): """ Getting informations from the json files @@ -123,14 +258,17 @@ def getWSInfo(self): - :key signalRegions: list of dictonaries with 'json path' and 'size' (number of bins) of the 'signal regions' channels in the json files - :key otherRegions: list of dictionnaries indicating the path and the name of the control and/or validation region channels """ - # Identifying the path to the SR and VR channels in the main workspace files + if self.errorFlag: + return + + # Identifying the path to the channels in the main workspace files self.channelsInfo = [] # workspace specifications if not isinstance(self.inputJsons, list): logger.error("The 'inputJsons' parameter must be of type list") self.errorFlag = True return - for ws, jsName in zip(self.inputJsons, [js for js in self.jsonFiles]): + for ws, jsName in zip(self.inputJsons, self.jsonFiles): wsChannelsInfo = {} wsChannelsInfo["signalRegions"] = [] wsChannelsInfo["otherRegions"] = [] @@ -143,43 +281,31 @@ def getWSInfo(self): self.channelsInfo = None return - nbCRwithEM = 0 - nbCRinWS = 0 - for dataset in self.jsonFiles[jsName]: - if "CR" in dataset: - nbCRwithEM += 1 - for ch in ws["channels"]: - if "CR" in ch["name"]: - nbCRinWS += 1 - if nbCRwithEM and nbCRwithEM != nbCRinWS: - logger.warning(f"Number of CRs in workspace: {nbCRinWS} but number of CRs with EM: {nbCRwithEM}. Signal in CRs will not be patched.") - if nbCRwithEM != 0 and not self.includeCRs: - logger.warning("EM in CRs but includeCRs == False. Signal in CRs will not be patched.") - - smodelsRegions = self.jsonFiles[jsName] # CR and SR names implemented in the database - for i_ch, ch in enumerate(ws["channels"]): - if "SR" in ch["name"] or ("CR" in ch["name"] and self.includeCRs and nbCRwithEM == nbCRinWS): # if channel name starts with 'SR' or 'CR' if includeCRs - nBins = len(ch["samples"][0]["data"]) - smodelsName = ";".join( smodelsRegions[:nBins] ) # Name of the corresponding CR or SR. Join all CR or SR names if multiple bins. - smodelsRegions = smodelsRegions[nBins:] - - wsChannelsInfo["signalRegions"].append( - { - "path": "/channels/" - + str(i_ch) - + "/samples/0", # Path of the new sample to add (signal prediction) - "size": nBins, - "smodelsName": smodelsName - } - ) # Number of bins - - else: - wsChannelsInfo["otherRegions"].append({'path': "/channels/" + str(i_ch), 'name': ch["name"]}) + sigInCRs = False + signalNames = self.nsignals[jsName].keys() + for signalName in signalNames: + for region in self.jsonFiles[jsName]: + if signalName == region['smodels'] and region['type'] == 'CR': + sigInCRs = True + if sigInCRs and not self.includeCRs: + logger.warning("Signal in CRs but includeCRs = False. CRs will still be removed.") + + smodelsRegions = self.nsignals[jsName].values() # CR and SR names implemented in the database + if "observations" in ws: + self.updatePyhfNames ( jsName, ws["observations"] ) + for i_r, region in enumerate ( self.jsonFiles[jsName] ): + for i_ch, ch in enumerate(ws["observations"]): + ## create a patch for the region, but only if channel matches + patch, patchType = self.createPatchForRegion ( region, i_ch, ch, jsName ) + if patch != None: + wsChannelsInfo[patchType].append(patch) + break wsChannelsInfo["otherRegions"].sort( key=lambda path: int(path['path'].split("/")[-1]), reverse=True ) # Need to sort correctly the paths to the channels to be removed self.channelsInfo.append(wsChannelsInfo) + # ic ( wsChannelsInfo ) def checkConsistency(self): """ @@ -187,29 +313,31 @@ def checkConsistency(self): :param zeroSignalsFlag: boolean identifying if all SRs of a single json are empty """ - if not isinstance(self.nsignals, list): + if not isinstance(self.nsignals, dict): logger.error("The 'nsignals' parameter must be of type list") self.errorFlag = True + if self.nWS != len(self.nsignals): logger.error( "The number of subsignals provided is different from the number of json files" ) self.errorFlag = True self.zeroSignalsFlag = list() - if self.channelsInfo == None: - return - for wsInfo, subSig in zip(self.channelsInfo, self.nsignals): - if not isinstance(subSig, list): - logger.error("The 'nsignals' parameter must be a two dimensional list") + + for jsName, subSig in zip(self.jsonFiles, self.nsignals.values()): + if not isinstance(subSig, dict): + logger.error("The 'nsignals' parameter must be a dictionary of dictionary") self.errorFlag = True + return nBinsJson = 0 - for sr in wsInfo["signalRegions"]: - nBinsJson += sr["size"] + for region in self.jsonFiles[jsName]: + if (region['type'] == 'SR') or (region['type'] == 'CR' and self.includeCRs and region['smodels'] is not None): + nBinsJson += 1 if nBinsJson != len(subSig): logger.error( - f"The number of signals ({len(subSig)}) provided is different from the number of bins for json ({nBinsJson}), number {self.channelsInfo.index(wsInfo)} and channel number {self.nsignals.index(subSig)}" ) + f"The number of signals ({len(subSig)}) provided is different from the number of signal bins for json ({nBinsJson}) for {jsName}" ) self.errorFlag = True - allZero = all([s == 0 for s in subSig]) + allZero = all([s == 0 for s in subSig.values()]) # Checking if all signals matching this json are zero self.zeroSignalsFlag.append(allZero) @@ -243,7 +371,9 @@ def __init__(self, data, cl=0.95, lumi=None ): self.nsignals = copy.deepcopy ( self.data.nsignals ) logger.debug("Signals : {}".format(self.nsignals)) self.inputJsons = self.data.inputJsons - self.channelsInfo = self.data.channelsInfo + self.channelsInfo = None + if hasattr ( self.data, "channelsInfo" ): + self.channelsInfo = self.data.channelsInfo self.zeroSignalsFlag = self.data.zeroSignalsFlag self.nWS = self.data.nWS self.includeCRs = data.includeCRs @@ -288,7 +418,9 @@ def rescale(self, factor): :return: updated list of patches and workspaces (self.patches, self.workspaces and self.workspaces_expected) """ - self.nsignals = [[sig * factor for sig in ws] for ws in self.nsignals] + for jsName in self.nsignals.keys(): + for regionName in self.nsignals[jsName].keys(): + self.nsignals[jsName][regionName] = self.nsignals[jsName][regionName]*factor try: self.alreadyBeenThere = self.nsignals == self.nsignals_2 except AttributeError: @@ -338,16 +470,20 @@ def patchMaker(self): return None # Constructing the patches to be applied on the main workspace files patches = [] - for ws, info, subSig in zip(self.inputJsons, self.channelsInfo, self.nsignals): + for ws, info, (jsFileName,jsFile) in zip(self.inputJsons, self.channelsInfo, self.data.jsonFiles.items() ): patch = [] for srInfo in info["signalRegions"]: - nBins = srInfo["size"] operator = {} # Operator for patching the signal operator["op"] = "add" operator["path"] = srInfo["path"] value = {} - value["data"] = subSig[:nBins] - subSig = subSig[nBins:] + #value["data"] = srInfo['signal'] + sr_order = srInfo["smodelsName"].split(";") + nsignals = self.nsignals[jsFileName] + # ic ( nsignals, sr_order, srInfo, jsFileName ) + value["data"] = [ nsignals[x] for x in sr_order ] + #import sys, IPython; IPython.embed( colors = "neutral" ); sys.exit() + # sys.exit() value["modifiers"] = [] value["modifiers"].append({"data": None, "type": "normfactor", "name": "mu_SIG"}) value["modifiers"].append({"data": None, "type": "lumi", "name": "lumi"}) @@ -369,13 +505,13 @@ def patchMaker(self): ## SR names to the pyhf ones. once these dataIdMaps are in place, ## they should be used instead of this hack that rewrites ## the pyhf channel names - if srInfo["smodelsName"]: # If the CRs/SRs have a name in the database (it is always True when running SModelS the usual way) + if False: # srInfo["smodelsName"]: # If the CRs/SRs have a name in the database (it is always True when running SModelS the usual way) operators = self.changeChannelName ( srInfo ) for operator in operators: patch.append(operator) for region in info["otherRegions"]: - if 'CR' in region['name'] and self.includeCRs: + if region['type'] == 'CR' and self.includeCRs: continue else: patch.append({"op": "remove", "path": region['path']}) # operator for removing useless regions @@ -546,9 +682,9 @@ def likelihood( self, mu=1.0, workspace_index=None, return_nll=False, self.backup() try: if abs(mu - 1.0) > 1e-6: - for i, ns in enumerate(self.data.nsignals): - for j, v in enumerate(ns): - self.data.nsignals[i][j] = v * mu + for jsName in self.data.nsignals.keys(): + for regionName in self.data.nsignals[jsName].keys(): + self.data.nsignals[jsName][regionName] = self.data.nsignals[jsName][regionName]*mu self.__init__(self.data, self.cl, self.lumi) ### allow this, for computation of l_SM # if self.zeroSignalsFlag[workspace_index] == True: @@ -929,10 +1065,12 @@ def root_func(mu): if pyhfinfo["backend"] == "numpy": sup.filter(RuntimeWarning, r"invalid value encountered in log") # print ("expected", expected, "return_expected", args["return_expected"], "mu", mu, "\nworkspace.data(model) :", workspace.data(model, include_auxdata = False), "\nworkspace.observations :", workspace.observations, "\nobs[data] :", workspace['observations']) + # ic ( workspace["channels"][0]["samples"][0]["data"] ) + # import sys, IPython; IPython.embed( colors = "neutral" ); sys.exit() try: result = pyhf.infer.hypotest(mu, workspace.data(model), model, **args) except Exception as e: - logger.info(f"when testing hypothesis {mu}, caught exception: {e}") + logger.error(f"when testing hypothesis {mu}, caught exception: {e}") result = float("nan") if expected == "posteriori": result = [float("nan")] * 2 @@ -959,6 +1097,7 @@ def root_func(mu): nattempts = 0 nNan = 0 lo_mu, med_mu, hi_mu = 0.2, 1.0, 5.0 + # ic ( "A", lo_mu, hi_mu, root_func(lo_mu), root_func(hi_mu) ) # print ( "starting with expected", expected ) while "mu is not in [lo_mu,hi_mu]": nattempts += 1 @@ -1033,88 +1172,90 @@ def root_func(mu): return ul # self.scale has been updated within self.rescale() method if __name__ == "__main__": - C = [ - 18774.2, - -2866.97, - -5807.3, - -4460.52, - -2777.25, - -1572.97, - -846.653, - -442.531, - -2866.97, - 496.273, - 900.195, - 667.591, - 403.92, - 222.614, - 116.779, - 59.5958, - -5807.3, - 900.195, - 1799.56, - 1376.77, - 854.448, - 482.435, - 258.92, - 134.975, - -4460.52, - 667.591, - 1376.77, - 1063.03, - 664.527, - 377.714, - 203.967, - 106.926, - -2777.25, - 403.92, - 854.448, - 664.527, - 417.837, - 238.76, - 129.55, - 68.2075, - -1572.97, - 222.614, - 482.435, - 377.714, - 238.76, - 137.151, - 74.7665, - 39.5247, - -846.653, - 116.779, - 258.92, - 203.967, - 129.55, - 74.7665, - 40.9423, - 21.7285, - -442.531, - 59.5958, - 134.975, - 106.926, - 68.2075, - 39.5247, - 21.7285, - 11.5732, - ] - nsignal = [x / 100.0 for x in [47, 29.4, 21.1, 14.3, 9.4, 7.1, 4.7, 4.3]] - m = PyhfData( - observed=[1964, 877, 354, 182, 82, 36, 15, 11], - backgrounds=[2006.4, 836.4, 350.0, 147.1, 62.0, 26.2, 11.1, 4.7], - covariance=C, - # third_moment = [ 0.1, 0.02, 0.1, 0.1, 0.003, 0.0001, 0.0002, 0.0005 ], - third_moment=[0.0] * 8, - nsignal=nsignal, - name="ATLAS-SUSY-2018-31 model", - ) - ulComp = PyhfUpperLimitComputer(cl=0.95) - # uls = ulComp.getUpperLimitOnMu ( Data ( 15,17.5,3.2,0.00454755 ) ) - # print ( "uls=", uls ) - ul_old = 131.828 * sum( - nsignal - ) # With respect to the older refernece value one must normalize the xsec - print("old ul=", ul_old) - ul = ulComp.getUpperLimitOnMu(m) - print("ul", ul) + ### Needs to be updated + print("Needs to be updated") + # C = [ + # 18774.2, + # -2866.97, + # -5807.3, + # -4460.52, + # -2777.25, + # -1572.97, + # -846.653, + # -442.531, + # -2866.97, + # 496.273, + # 900.195, + # 667.591, + # 403.92, + # 222.614, + # 116.779, + # 59.5958, + # -5807.3, + # 900.195, + # 1799.56, + # 1376.77, + # 854.448, + # 482.435, + # 258.92, + # 134.975, + # -4460.52, + # 667.591, + # 1376.77, + # 1063.03, + # 664.527, + # 377.714, + # 203.967, + # 106.926, + # -2777.25, + # 403.92, + # 854.448, + # 664.527, + # 417.837, + # 238.76, + # 129.55, + # 68.2075, + # -1572.97, + # 222.614, + # 482.435, + # 377.714, + # 238.76, + # 137.151, + # 74.7665, + # 39.5247, + # -846.653, + # 116.779, + # 258.92, + # 203.967, + # 129.55, + # 74.7665, + # 40.9423, + # 21.7285, + # -442.531, + # 59.5958, + # 134.975, + # 106.926, + # 68.2075, + # 39.5247, + # 21.7285, + # 11.5732, + # ] + # nsignal = [x / 100.0 for x in [47, 29.4, 21.1, 14.3, 9.4, 7.1, 4.7, 4.3]] + # m = PyhfData( + # observed=[1964, 877, 354, 182, 82, 36, 15, 11], + # backgrounds=[2006.4, 836.4, 350.0, 147.1, 62.0, 26.2, 11.1, 4.7], + # covariance=C, + # # third_moment = [ 0.1, 0.02, 0.1, 0.1, 0.003, 0.0001, 0.0002, 0.0005 ], + # third_moment=[0.0] * 8, + # nsignal=nsignal, + # name="ATLAS-SUSY-2018-31 model", + # ) + # ulComp = PyhfUpperLimitComputer(cl=0.95) + # # uls = ulComp.getUpperLimitOnMu ( Data ( 15,17.5,3.2,0.00454755 ) ) + # # print ( "uls=", uls ) + # ul_old = 131.828 * sum( + # nsignal + # ) # With respect to the older refernece value one must normalize the xsec + # print("old ul=", ul_old) + # ul = ulComp.getUpperLimitOnMu(m) + # print("ul", ul) diff --git a/smodels/statistics/statsTools.py b/smodels/statistics/statsTools.py index 3ada7149cd..2b0c4c2b26 100755 --- a/smodels/statistics/statsTools.py +++ b/smodels/statistics/statsTools.py @@ -28,7 +28,7 @@ class StatsComputer: "upperLimitComputer", "deltas_sys", "allowNegativeSignals" ] def __init__ ( self, dataObject : Union['DataSet','CombinedDataSet', list], dataType : str, - nsig : Union[None,float,List] = None, + nsig : Union[None,float,List,Dict] = None, deltas_rel : Union[None,float] = None, allowNegativeSignals : bool = False): """ @@ -214,7 +214,6 @@ def getComputerPyhf(self ): Create computer for a pyhf result """ - globalInfo = self.dataObject.globalInfo jsonFiles = [js for js in globalInfo.jsonFiles] jsons = globalInfo.jsons.copy() @@ -222,42 +221,58 @@ def getComputerPyhf(self ): datasets = [ds.getID() for ds in self.dataObject.origdatasets] # Filtering the json files by looking at the available datasets for jsName in globalInfo.jsonFiles: - if all([ds not in globalInfo.jsonFiles[jsName] for ds in datasets]): + jsonSRs = [] + for ir,region in enumerate ( globalInfo.jsonFiles[jsName] ): + if isinstance(region,str): + region = { "smodels": region, "type": "SR" } + elif isinstance(region,dict) and not ("type" in region): + region["type"]="SR" + else: + raise SModelSError("The jsonFiles field should contain lists \ + of strings or dictionaries \ + (%s is not allowed)" %type(region)) + + globalInfo.jsonFiles[jsName][ir] = region + if region['type'] == 'SR': + jsonSRs.append(region['smodels']) + if all([ds not in jsonSRs for ds in datasets]): # No datasets found for this json combination jsIndex = jsonFiles.index(jsName) jsonFiles.pop(jsIndex) jsons.pop(jsIndex) continue - if not all([ds in datasets for ds in globalInfo.jsonFiles[jsName]]): + if not all([SR in datasets for SR in jsonSRs]): # Some SRs are missing for this json combination - logger.error( "Wrong json definition in globalInfo.jsonFiles for json : %s" % jsName) + logger.error( f"Wrong json definition in globalInfo.jsonFiles for json : {jsName}" ) + + jsonDictNames = {} + for jsName in jsonFiles: + jsonDictNames.update( { jsName: [ region['smodels'] for region in globalInfo.jsonFiles[jsName] if region is not None ] } ) + # jsonRegions = [ [region['smodels'] for region in globalInfo.jsonFiles[jsName]] for jsName in jsonFiles] + jsonRegions = [ region for regions in jsonDictNames.values() for region in regions ] + for ds in datasets: + if not ds in jsonRegions: + logger.info(f'Region {ds} does not appear in any json file for {globalInfo.id}') logger.debug("list of datasets: {}".format(datasets)) logger.debug("jsonFiles after filtering: {}".format(jsonFiles)) + + # Constructing the list of signals with subsignals matching each json + nsignals = {} + for jsName in jsonFiles: + nsignals.update( { jsName: {} } ) + for name, nsig in self.nsig.items(): + for jsName in nsignals.keys(): + if name in jsonDictNames[jsName]: + nsignals[jsName].update( { name: nsig } ) + includeCRs = False if hasattr(globalInfo,'includeCRs'): includeCRs = globalInfo.includeCRs signalUncertainty = None if hasattr(globalInfo,"signalUncertainty"): signalUncertainty = globalInfo.signalUncertainty - # Constructing the list of signals with subsignals matching each json - nsignals = list() - for jsName in jsonFiles: - subSig = list() - for srName in globalInfo.jsonFiles[jsName]: - try: - if 'CR' in srName and not includeCRs: # Allow signal to leak in CRs only if they are kept - continue - index = datasets.index(srName) - except ValueError: - line = ( - f"{srName} signal region provided in globalInfo is not in the list of datasets, {jsName}:{','.join(datasets)}" - ) - raise ValueError(line) - sig = self.nsig[index] - subSig.append(sig) - nsignals.append(subSig) + # Loading the jsonFiles, unless we already have them (because we pickled) - nsig = self.nsig data = PyhfData(nsignals, jsons, globalInfo.jsonFiles, includeCRs, signalUncertainty) if data.errorFlag: return None diff --git a/unittests/patch_2018-16.json b/unittests/patch_2018-16.json index 9ae2742693..b94cf86687 100644 --- a/unittests/patch_2018-16.json +++ b/unittests/patch_2018-16.json @@ -33,16 +33,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/0/name", - "value": "CR_ewk_VV_high" - }, - { - "op": "replace", - "path": "/observations/0/name", - "value": "CR_ewk_VV_high" - }, { "op": "add", "path": "/channels/1/samples/0", @@ -77,16 +67,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/1/name", - "value": "CR_ewk_VV_low" - }, - { - "op": "replace", - "path": "/observations/1/name", - "value": "CR_ewk_VV_low" - }, { "op": "add", "path": "/channels/2/samples/0", @@ -121,16 +101,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/2/name", - "value": "CR_ewk_tau_high" - }, - { - "op": "replace", - "path": "/observations/2/name", - "value": "CR_ewk_tau_high" - }, { "op": "add", "path": "/channels/3/samples/0", @@ -165,16 +135,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/3/name", - "value": "CR_ewk_tau_low" - }, - { - "op": "replace", - "path": "/observations/3/name", - "value": "CR_ewk_tau_low" - }, { "op": "add", "path": "/channels/4/samples/0", @@ -209,16 +169,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/4/name", - "value": "CR_ewk_top_high" - }, - { - "op": "replace", - "path": "/observations/4/name", - "value": "CR_ewk_top_high" - }, { "op": "add", "path": "/channels/5/samples/0", @@ -253,16 +203,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/5/name", - "value": "CR_ewk_top_low" - }, - { - "op": "replace", - "path": "/observations/5/name", - "value": "CR_ewk_top_low" - }, { "op": "add", "path": "/channels/6/samples/0", @@ -297,16 +237,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/6/name", - "value": "SR_ewk_2l_ee_high_c" - }, - { - "op": "replace", - "path": "/observations/6/name", - "value": "SR_ewk_2l_ee_high_c" - }, { "op": "add", "path": "/channels/7/samples/0", @@ -341,16 +271,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/7/name", - "value": "SR_ewk_2l_ee_low_c" - }, - { - "op": "replace", - "path": "/observations/7/name", - "value": "SR_ewk_2l_ee_low_c" - }, { "op": "add", "path": "/channels/8/samples/0", @@ -385,16 +305,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/8/name", - "value": "SR_ewk_2l_ee_med_c" - }, - { - "op": "replace", - "path": "/observations/8/name", - "value": "SR_ewk_2l_ee_med_c" - }, { "op": "add", "path": "/channels/9/samples/0", @@ -429,16 +339,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/9/name", - "value": "SR_ewk_2l_ee_high_d" - }, - { - "op": "replace", - "path": "/observations/9/name", - "value": "SR_ewk_2l_ee_high_d" - }, { "op": "add", "path": "/channels/10/samples/0", @@ -473,16 +373,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/10/name", - "value": "SR_ewk_2l_ee_low_d" - }, - { - "op": "replace", - "path": "/observations/10/name", - "value": "SR_ewk_2l_ee_low_d" - }, { "op": "add", "path": "/channels/11/samples/0", @@ -517,16 +407,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/11/name", - "value": "SR_ewk_2l_ee_med_d" - }, - { - "op": "replace", - "path": "/observations/11/name", - "value": "SR_ewk_2l_ee_med_d" - }, { "op": "add", "path": "/channels/12/samples/0", @@ -561,16 +441,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/12/name", - "value": "SR_ewk_2l_ee_high_e" - }, - { - "op": "replace", - "path": "/observations/12/name", - "value": "SR_ewk_2l_ee_high_e" - }, { "op": "add", "path": "/channels/13/samples/0", @@ -605,16 +475,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/13/name", - "value": "SR_ewk_2l_ee_low_e" - }, - { - "op": "replace", - "path": "/observations/13/name", - "value": "SR_ewk_2l_ee_low_e" - }, { "op": "add", "path": "/channels/14/samples/0", @@ -649,16 +509,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/14/name", - "value": "SR_ewk_2l_ee_med_e" - }, - { - "op": "replace", - "path": "/observations/14/name", - "value": "SR_ewk_2l_ee_med_e" - }, { "op": "add", "path": "/channels/15/samples/0", @@ -693,16 +543,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/15/name", - "value": "SR_ewk_2l_ee_high_f" - }, - { - "op": "replace", - "path": "/observations/15/name", - "value": "SR_ewk_2l_ee_high_f" - }, { "op": "add", "path": "/channels/16/samples/0", @@ -737,16 +577,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/16/name", - "value": "SR_ewk_2l_ee_low_f" - }, - { - "op": "replace", - "path": "/observations/16/name", - "value": "SR_ewk_2l_ee_low_f" - }, { "op": "add", "path": "/channels/17/samples/0", @@ -781,16 +611,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/17/name", - "value": "SR_ewk_2l_ee_med_f" - }, - { - "op": "replace", - "path": "/observations/17/name", - "value": "SR_ewk_2l_ee_med_f" - }, { "op": "add", "path": "/channels/18/samples/0", @@ -825,16 +645,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/18/name", - "value": "SR_ewk_2l_ee_high_g" - }, - { - "op": "replace", - "path": "/observations/18/name", - "value": "SR_ewk_2l_ee_high_g" - }, { "op": "add", "path": "/channels/19/samples/0", @@ -869,16 +679,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/19/name", - "value": "SR_ewk_2l_ee_low_g" - }, - { - "op": "replace", - "path": "/observations/19/name", - "value": "SR_ewk_2l_ee_low_g" - }, { "op": "add", "path": "/channels/20/samples/0", @@ -913,16 +713,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/20/name", - "value": "SR_ewk_2l_ee_high_h" - }, - { - "op": "replace", - "path": "/observations/20/name", - "value": "SR_ewk_2l_ee_high_h" - }, { "op": "add", "path": "/channels/21/samples/0", @@ -957,16 +747,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/21/name", - "value": "SR_ewk_2l_ee_low_h" - }, - { - "op": "replace", - "path": "/observations/21/name", - "value": "SR_ewk_2l_ee_low_h" - }, { "op": "add", "path": "/channels/22/samples/0", @@ -1001,16 +781,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/22/name", - "value": "SR_ewk_2l_mm_high_a" - }, - { - "op": "replace", - "path": "/observations/22/name", - "value": "SR_ewk_2l_mm_high_a" - }, { "op": "add", "path": "/channels/23/samples/0", @@ -1045,16 +815,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/23/name", - "value": "SR_ewk_2l_mm_low_a" - }, - { - "op": "replace", - "path": "/observations/23/name", - "value": "SR_ewk_2l_mm_low_a" - }, { "op": "add", "path": "/channels/24/samples/0", @@ -1089,16 +849,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/24/name", - "value": "SR_ewk_2l_mm_med_a" - }, - { - "op": "replace", - "path": "/observations/24/name", - "value": "SR_ewk_2l_mm_med_a" - }, { "op": "add", "path": "/channels/25/samples/0", @@ -1133,16 +883,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/25/name", - "value": "SR_ewk_2l_mm_high_b" - }, - { - "op": "replace", - "path": "/observations/25/name", - "value": "SR_ewk_2l_mm_high_b" - }, { "op": "add", "path": "/channels/26/samples/0", @@ -1177,16 +917,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/26/name", - "value": "SR_ewk_2l_mm_low_b" - }, - { - "op": "replace", - "path": "/observations/26/name", - "value": "SR_ewk_2l_mm_low_b" - }, { "op": "add", "path": "/channels/27/samples/0", @@ -1221,16 +951,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/27/name", - "value": "SR_ewk_2l_mm_med_b" - }, - { - "op": "replace", - "path": "/observations/27/name", - "value": "SR_ewk_2l_mm_med_b" - }, { "op": "add", "path": "/channels/28/samples/0", @@ -1265,16 +985,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/28/name", - "value": "SR_ewk_2l_mm_high_c" - }, - { - "op": "replace", - "path": "/observations/28/name", - "value": "SR_ewk_2l_mm_high_c" - }, { "op": "add", "path": "/channels/29/samples/0", @@ -1309,16 +1019,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/29/name", - "value": "SR_ewk_2l_mm_low_c" - }, - { - "op": "replace", - "path": "/observations/29/name", - "value": "SR_ewk_2l_mm_low_c" - }, { "op": "add", "path": "/channels/30/samples/0", @@ -1353,16 +1053,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/30/name", - "value": "SR_ewk_2l_mm_med_c" - }, - { - "op": "replace", - "path": "/observations/30/name", - "value": "SR_ewk_2l_mm_med_c" - }, { "op": "add", "path": "/channels/31/samples/0", @@ -1397,16 +1087,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/31/name", - "value": "SR_ewk_2l_mm_high_d" - }, - { - "op": "replace", - "path": "/observations/31/name", - "value": "SR_ewk_2l_mm_high_d" - }, { "op": "add", "path": "/channels/32/samples/0", @@ -1441,16 +1121,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/32/name", - "value": "SR_ewk_2l_mm_low_d" - }, - { - "op": "replace", - "path": "/observations/32/name", - "value": "SR_ewk_2l_mm_low_d" - }, { "op": "add", "path": "/channels/33/samples/0", @@ -1485,16 +1155,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/33/name", - "value": "SR_ewk_2l_mm_med_d" - }, - { - "op": "replace", - "path": "/observations/33/name", - "value": "SR_ewk_2l_mm_med_d" - }, { "op": "add", "path": "/channels/34/samples/0", @@ -1529,16 +1189,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/34/name", - "value": "SR_ewk_2l_mm_high_e" - }, - { - "op": "replace", - "path": "/observations/34/name", - "value": "SR_ewk_2l_mm_high_e" - }, { "op": "add", "path": "/channels/35/samples/0", @@ -1573,16 +1223,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/35/name", - "value": "SR_ewk_2l_mm_low_e" - }, - { - "op": "replace", - "path": "/observations/35/name", - "value": "SR_ewk_2l_mm_low_e" - }, { "op": "add", "path": "/channels/36/samples/0", @@ -1617,16 +1257,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/36/name", - "value": "SR_ewk_2l_mm_med_e" - }, - { - "op": "replace", - "path": "/observations/36/name", - "value": "SR_ewk_2l_mm_med_e" - }, { "op": "add", "path": "/channels/37/samples/0", @@ -1661,16 +1291,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/37/name", - "value": "SR_ewk_2l_mm_high_f" - }, - { - "op": "replace", - "path": "/observations/37/name", - "value": "SR_ewk_2l_mm_high_f" - }, { "op": "add", "path": "/channels/38/samples/0", @@ -1705,16 +1325,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/38/name", - "value": "SR_ewk_2l_mm_low_f" - }, - { - "op": "replace", - "path": "/observations/38/name", - "value": "SR_ewk_2l_mm_low_f" - }, { "op": "add", "path": "/channels/39/samples/0", @@ -1749,16 +1359,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/39/name", - "value": "SR_ewk_2l_mm_med_f" - }, - { - "op": "replace", - "path": "/observations/39/name", - "value": "SR_ewk_2l_mm_med_f" - }, { "op": "add", "path": "/channels/40/samples/0", @@ -1793,16 +1393,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/40/name", - "value": "SR_ewk_2l_mm_high_g" - }, - { - "op": "replace", - "path": "/observations/40/name", - "value": "SR_ewk_2l_mm_high_g" - }, { "op": "add", "path": "/channels/41/samples/0", @@ -1837,16 +1427,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/41/name", - "value": "SR_ewk_2l_mm_low_g" - }, - { - "op": "replace", - "path": "/observations/41/name", - "value": "SR_ewk_2l_mm_low_g" - }, { "op": "add", "path": "/channels/42/samples/0", @@ -1881,16 +1461,6 @@ "name": "bsm" } }, - { - "op": "replace", - "path": "/channels/42/name", - "value": "SR_ewk_2l_mm_high_h" - }, - { - "op": "replace", - "path": "/observations/42/name", - "value": "SR_ewk_2l_mm_high_h" - }, { "op": "add", "path": "/channels/43/samples/0", @@ -1924,15 +1494,5 @@ ], "name": "bsm" } - }, - { - "op": "replace", - "path": "/channels/43/name", - "value": "SR_ewk_2l_mm_low_h" - }, - { - "op": "replace", - "path": "/observations/43/name", - "value": "SR_ewk_2l_mm_low_h" } ] \ No newline at end of file diff --git a/unittests/testPyhf.py b/unittests/testPyhf.py index 96d57fa7ee..ceb60b9541 100755 --- a/unittests/testPyhf.py +++ b/unittests/testPyhf.py @@ -14,6 +14,8 @@ import json import jsonpatch from smodels.statistics.pyhfInterface import PyhfData, PyhfUpperLimitComputer, pyhf +from smodels.base.smodelsLogging import logger +import warnings class PyhfTest(unittest.TestCase): @@ -53,28 +55,6 @@ def simpleJson(self, bkg, obs): version='1.0.0') return ws - def testJsonNames(self): - """ FIXME this is a test for the correctness of our - pyhf name hack. the hack as well as this test will disappear, - once a proper smodels-name to pyhf-name map is in place in the database - """ - from smodels.experiment.databaseObj import Database - database = Database('./database_extra/') - expRes = database.getExpResults(analysisIDs=['ATLAS-SUSY-2019-09'], - datasetIDs=['all'], - dataTypes=['efficiencyMap'])[0] - from smodels.experiment.datasetObj import CombinedDataSet - deltas_rel = 0. - srNsigs = [0.]*len(expRes.origdatasets) - from smodels.statistics.statsTools import StatsComputer - cdataset = CombinedDataSet ( expRes ) - computer = StatsComputer.forPyhf( cdataset, srNsigs, deltas_rel ) - channelnames = [['SRWZ_1', 'SRWZ_10', 'SRWZ_11', 'SRWZ_12', 'SRWZ_13', 'SRWZ_14', 'SRWZ_15', 'SRWZ_16', 'SRWZ_17', 'SRWZ_18', 'SRWZ_19', 'SRWZ_2', 'SRWZ_20', 'SRWZ_3', 'SRWZ_4', 'SRWZ_5', 'SRWZ_6', 'SRWZ_7', 'SRWZ_8', 'SRWZ_9', 'WZ_CR_0jets_cuts', 'WZ_CR_HighHT_cuts', 'WZ_CR_LowHT_cuts'],['CR_0J_WZ_cuts', 'CR_nJ_WZ_cuts', 'SRhigh_0Jb', 'SRhigh_0Jc', 'SRhigh_0Jd', 'SRhigh_0Je', 'SRhigh_0Jf1', 'SRhigh_0Jf2', 'SRhigh_0Jg1', 'SRhigh_0Jg2', 'SRhigh_nJa', 'SRhigh_nJb', 'SRhigh_nJc', 'SRhigh_nJd', 'SRhigh_nJe', 'SRhigh_nJf', 'SRhigh_nJg', 'SRlow_0Jb', 'SRlow_0Jc', 'SRlow_0Jd', 'SRlow_0Je', 'SRlow_0Jf1', 'SRlow_0Jf2', 'SRlow_0Jg1', 'SRlow_0Jg2', 'SRlow_nJb', 'SRlow_nJc', 'SRlow_nJd', 'SRlow_nJe', 'SRlow_nJf1', 'SRlow_nJf2', 'SRlow_nJg1', 'SRlow_nJg2']] - for i,ws in enumerate ( computer.likelihoodComputer.workspaces ): - model = ws.model() - self.assertTrue ( model.config.channels == channelnames[i] ) - - def testCorruptJson1Signal(self): """ Tests how the module handles corrupted json files @@ -110,7 +90,8 @@ def testCorruptJson1Signal(self): observations=observations, version='1.0.0' ) - data = PyhfData([[0.1]], [ws] ) + signal = { "dummy0": { "SR1": 0.1 } } + data = PyhfData( signal, [ws] ) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertEqual(ulcomputer.workspaces, None) @@ -121,7 +102,7 @@ def testCorruptJson1Signal(self): observations=observations, version='1.0.0' ) - data = PyhfData([[0.1]], [ws] ) + data = PyhfData( signal, [ws] ) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertEqual(ulcomputer.workspaces, None) @@ -132,7 +113,7 @@ def testCorruptJson1Signal(self): #observations=observations, version='1.0.0' ) - data = PyhfData([[0.1]], [ws]) + data = PyhfData( signal, [ws]) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertEqual(ulcomputer.workspaces, None) @@ -143,7 +124,7 @@ def testCorruptJson1Signal(self): observations=observations, #version='1.0.0' ) - data = PyhfData([[0.1]], [ws]) + data = PyhfData(signal, [ws]) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertIsNone(ulcomputer.workspaces) @@ -181,8 +162,10 @@ def testCorruptJson2Signal(self): measurements=measurements, observations=observations, version='1.0.0' - ) - data = PyhfData([[0.1, 0.2]], [ws]) + ) + signal = { "dummy0": { "SR1": 0.1, "SR2": 0.2 } } + data = PyhfData(signal, [ws]) + #data = PyhfData([[0.1, 0.2]], [ws]) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertEqual(ulcomputer.workspaces, None) @@ -193,7 +176,7 @@ def testCorruptJson2Signal(self): observations=observations, version='1.0.0' ) - data = PyhfData([[0.1, 0.2]], [ws] ) + data = PyhfData(signal, [ws] ) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertEqual(ulcomputer.workspaces, None) @@ -204,7 +187,7 @@ def testCorruptJson2Signal(self): #observations=observations, version='1.0.0' ) - data = PyhfData([[0.1, 0.2]], [ws] ) + data = PyhfData(signal, [ws] ) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertEqual(ulcomputer.workspaces, None) @@ -215,7 +198,7 @@ def testCorruptJson2Signal(self): observations=observations, #version='1.0.0' ) - data = PyhfData([[0.1, 0.2]], [ws] ) + data = PyhfData(signal, [ws] ) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertIsNone(ulcomputer.workspaces) @@ -226,7 +209,8 @@ def testNoSignal(self): Tests the case where all SRs are empty """ ws = self.simpleJson([0.9], [10]) - data = PyhfData([[0]], [ws]) + signal = { "dummy0": { "SR1": 0. } } + data = PyhfData(signal, [ws]) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() self.assertIsNone(ul) @@ -237,12 +221,15 @@ def testWrongNbOfSignals(self): """ # One single json but too much signals ws = self.simpleJson([0.9], [10]) - data = PyhfData([[0.9, 0.5]], [ws] ) + nsignals = [[0.9, 0.5]] + nsignals = { "dummy0": { "SR1": 0.9, "SR2": 0.5 } } + data = PyhfData( nsignals, [ws] ) ulcomputer = PyhfUpperLimitComputer(data) ul1 = ulcomputer.getUpperLimitOnMu() # Two jsons but only one signal ws = [self.simpleJson([0.9], [10]), self.simpleJson([0.8], [9])] - data = PyhfData([[0.5]], ws ) + signal = { "dummy0": { "SR1": 0.5 } } + data = PyhfData( signal, ws ) ulcomputer = PyhfUpperLimitComputer(data) ul2 = ulcomputer.getUpperLimitOnMu(workspace_index=0) self.assertIsNone(ul1) @@ -255,9 +242,10 @@ def testWSindex(self): """ bg = [ .9, .8 ] obs = [ 10, 9 ] - nsig = [ .1, .2 ] + # nsig = [ .1, .2 ] ws = [ self.simpleJson([x], [y]) for x,y in zip (bg,obs) ] - nsignals = [ [x] for x in nsig ] + # nsignals = [ [x] for x in nsig ] + nsignals = { "dummy0": { "SR1" : .1 }, "dummy1": { "SR1": .2 } } data = PyhfData( nsignals, ws) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() @@ -274,13 +262,13 @@ def testFullPyhfModule1(self): Computes the UL using the pyhfInterface module and checks if, outside of the module, this UL still gives a 95% CLs """ bkg = self.simpleJson([0.8], [10]) - signals = [0.4] + signals = [ 0.4 ] # Make the patch by hand patch = [dict( op='add', path='/channels/0/samples/0', value=dict( - name='sig', + name='SR1', data=signals, modifiers=[ dict( @@ -296,9 +284,10 @@ def testFullPyhfModule1(self): ] ) )] + signals = { "dummy0": { "SR1": 0.4 } } llhdSpec = jsonpatch.apply_patch(bkg, patch) # Computing the upper limit with the SModelS/pyhf interface - data = PyhfData([signals], [bkg]) + data = PyhfData(signals, [bkg]) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() # ul = ul * data.totalYield @@ -332,7 +321,7 @@ def testFullPyhfModule2(self): op='add', path='/channels/0/samples/0', value=dict( - name='sig', + name='SR1', data=signals, modifiers=[ dict( @@ -350,7 +339,8 @@ def testFullPyhfModule2(self): )] llhdSpec = jsonpatch.apply_patch(bkg, patch) # Computing the upper limit with the SModelS/pyhf interface - data = PyhfData([signals], [bkg] ) + signals = { "dummy0": { "SR1[0]": 0.4, "SR1[1]": 0.2 } } + data = PyhfData(signals, [bkg] ) ulcomputer = PyhfUpperLimitComputer(data) ul = ulcomputer.getUpperLimitOnMu() # Computing the cls outside of SModelS with POI = ul, should give 0.95 @@ -385,6 +375,7 @@ def testPatchUncertaintyAndCRs(self): import os from smodels.decomposition import decomposer from smodels.matching.theoryPrediction import _getDataSetPredictions, _getCombinedResultFor, TheoryPredictionList + warnings.filterwarnings("ignore", category=DeprecationWarning) database = Database('./database/') runtime.modelFile = "smodels.share.models.mssm" @@ -420,6 +411,12 @@ def testPatchUncertaintyAndCRs(self): with open('patch_2018-16.json','r') as f: patch_ref = json.load(f) + equals = patch == patch_ref + if not equals: + logger.error ( f"patch_2018-16.json != debug.json" ) + with open('debug.json','w') as f: + json.dump(patch,f,indent=4) + self.assertEqual(patch,patch_ref) if __name__ == "__main__": diff --git a/unittests/testRunSModelS.py b/unittests/testRunSModelS.py index 910d35023d..74bfb0d009 100755 --- a/unittests/testRunSModelS.py +++ b/unittests/testRunSModelS.py @@ -117,6 +117,8 @@ def testGoodFileWithModelFromSLHA(self): self.removeOutputs(outputfile) def testPyhfCombination(self): + import warnings + warnings.filterwarnings("ignore", category=DeprecationWarning) filename = "./testFiles/slha/T6bbHH_pyhf.slha" inifile = "./testParameters_pyhf.ini" outputfile = runMain(filename, inifile=inifile, suppressStdout=True)