diff --git a/smodels/matching/modelTester.py b/smodels/matching/modelTester.py index 4a63be510..7588c3760 100644 --- a/smodels/matching/modelTester.py +++ b/smodels/matching/modelTester.py @@ -92,7 +92,7 @@ def testPoint(inputFile, outputDir, parser, database): printParameters = OrderedDict(printParameters) outputStatus = ioObjects.OutputStatus(inputStatus.status, inputFile, - printParameters, + printParameters, database.databaseVersion) masterPrinter.addObj(outputStatus) if outputStatus.status < 0: @@ -121,7 +121,7 @@ def testPoint(inputFile, outputDir, parser, database): ignorePromptQNumbers = ignorePromptQNumbers.replace(" ","") ignorePromptQNumbers = ignorePromptQNumbers.split(",") model.updateParticles(inputFile=inputFile, - promptWidth=promptWidth, + promptWidth=promptWidth, stableWidth=stableWidth, ignorePromptQNumbers=ignorePromptQNumbers) except SModelSError as e: @@ -186,7 +186,7 @@ def testPoint(inputFile, outputDir, parser, database): pass allPredictions = theoryPredictionsFor(database, smstoplist, - useBestDataset=useBest, + useBestDataset=useBest, combinedResults=combineResults) """Compute chi-square and likelihood""" @@ -201,6 +201,7 @@ def testPoint(inputFile, outputDir, parser, database): if len(theoryPredictions) != 0: outputStatus.updateStatus(1) + theoryPredictions._theoryPredictions = [tp for tp in theoryPredictions._theoryPredictions if not "CR" in os.path.basename(tp.dataset.path)] # Do not print CRs "results" masterPrinter.addObj(theoryPredictions) else: outputStatus.updateStatus(0) # no results after enforcing maxcond @@ -319,7 +320,7 @@ def _determineNCPus(cpus_wanted, n_files): return ncpus -def testPoints(fileList, inDir, outputDir, parser, database, +def testPoints(fileList, inDir, outputDir, parser, database, timeout, development, parameterFile): """ Loop over all input files in fileList with testPoint, using ncpus CPUs @@ -504,7 +505,7 @@ def loadDatabaseResults(parser, database): database.selectExpResults(analysisIDs=analyses, txnames=txnames, datasetIDs=datasetIDs, dataTypes=dataTypes, useNonValidated=useNonValidated) - + def getParameters(parameterFile): """ @@ -590,4 +591,3 @@ def getCombiner(inputFile,parameterFile): return combiner - diff --git a/smodels/matching/theoryPrediction.py b/smodels/matching/theoryPrediction.py index c34aa9644..4f3d8c0c5 100644 --- a/smodels/matching/theoryPrediction.py +++ b/smodels/matching/theoryPrediction.py @@ -684,7 +684,7 @@ def theoryPredictionsFor(database : Database, smsTopDict : Dict, if combinedRes is not None: dataSetResults.append(TheoryPredictionList([combinedRes])) if not useBestDataset: # Report all datasets - expResults = sum(dataSetResults) + expResults = sum(dataSetResults) else: expResults = TheoryPredictionList() expResults.append(_getBestResult(dataSetResults)) # Best result = combination if available @@ -692,7 +692,10 @@ def theoryPredictionsFor(database : Database, smsTopDict : Dict, for theoPred in expResults: theoPred.expResult = expResult theoPred.deltas_rel = deltas_rel - theoPred.upperLimit = theoPred.getUpperLimit() + if not isinstance(theoPred.dataset,CombinedDataSet) and not theoPred.dataset.dataInfo.dataId is None and "CR" in theoPred.dataset.dataInfo.dataId: # Individual CRs shouldn't give results + theoPred.upperLimit = None + else: + theoPred.upperLimit = theoPred.getUpperLimit() expResults.sortTheoryPredictions() for theoPred in expResults: @@ -778,7 +781,7 @@ def _getBestResult(dataSetResults): # return the single result: if len(dataSetResults) == 1: return dataSetResults[0] - + # If combination is included, always return it for predList in dataSetResults: for tp in predList: @@ -794,6 +797,8 @@ def _getBestResult(dataSetResults): logger.error("Multiple clusters should only exist for upper limit results!") raise SModelSError() dataset = predList[0].dataset + if "CR" in dataset.dataInfo.dataId: # A CR cannot be the best SR + continue if dataset.getType() != "efficiencyMap": txt = ( "Multiple data sets should only exist for efficiency map results, but we have them for %s?" diff --git a/smodels/statistics/pyhfInterface.py b/smodels/statistics/pyhfInterface.py index 1b976589f..395fa45c9 100755 --- a/smodels/statistics/pyhfInterface.py +++ b/smodels/statistics/pyhfInterface.py @@ -17,7 +17,7 @@ import numpy as np from smodels.base.smodelsLogging import logger import logging -import warnings +logging.getLogger("pyhf").setLevel(logging.CRITICAL) warnings.filterwarnings("ignore") jsonver = "" @@ -77,9 +77,7 @@ warnings.filterwarnings("ignore", r"invalid value encountered in log") - countWarning = {"llhdszero": 0} -# logger=getLogger() class PyhfData: @@ -91,14 +89,18 @@ class PyhfData: :ivar nWS: number of workspaces = number of json files """ - def __init__(self, nsignals, inputJsons, jsonFiles=None ): + def __init__(self, nsignals, inputJsons, jsonFiles=None, includeCRs=False, signalUncertainty=None): self.nsignals = nsignals # fb self.getTotalYield() self.inputJsons = inputJsons self.cached_likelihoods = {} ## cache of likelihoods (actually twice_nlls) self.cached_lmaxes = {} # cache of lmaxes (actually twice_nlls) self.cachedULs = {False: {}, True: {}, "posteriori": {}} + if jsonFiles is None: # If no name has been provided for the json file(s) and the channels, use fake ones + jsonFiles = dict( zip( [ "dummy%d" % i for i in range(len(inputJsons)) ], [ "" for i in range(len(inputJsons)) ] ) ) self.jsonFiles = jsonFiles + self.includeCRs = includeCRs + self.signalUncertainty = signalUncertainty self.combinations = None if jsonFiles != None: self.combinations = [os.path.splitext(os.path.basename(js))[0] for js in jsonFiles] @@ -119,7 +121,7 @@ def getWSInfo(self): :ivar channelsInfo: list of dictionaries (one dictionary for each json file) containing useful information about the json files - :key signalRegions: list of dictonaries with 'json path' and 'size' (number of bins) of the 'signal regions' channels in the json files - - :key otherRegions: list of strings indicating the path to the control and validation region channels + - :key otherRegions: list of dictionnaries indicating the path and the name of the control and/or validation region channels """ # Identifying the path to the SR and VR channels in the main workspace files self.channelsInfo = [] # workspace specifications @@ -127,7 +129,7 @@ def getWSInfo(self): logger.error("The 'inputJsons' parameter must be of type list") self.errorFlag = True return - for ws in self.inputJsons: + for ws, jsName in zip(self.inputJsons, [js for js in self.jsonFiles]): wsChannelsInfo = {} wsChannelsInfo["signalRegions"] = [] wsChannelsInfo["otherRegions"] = [] @@ -139,8 +141,20 @@ def getWSInfo(self): ) self.channelsInfo = None return + nbCRwithEM = 0 + nbCRinWS = 0 + for dataset in self.jsonFiles[jsName]: + if "CR" in dataset: + nbCRwithEM += 1 + for ch in ws["channels"]: + if "CR" in ch["name"]: + nbCRinWS += 1 + if nbCRwithEM and nbCRwithEM != nbCRinWS: + logger.warning(f"Number of CRs in workspace: {nbCRinWS} but number of CRs with EM: {nbCRwithEM}. Signal in CRs will not be patched.") + if nbCRwithEM != 0 and not self.includeCRs: + logger.warning("EM in CRs but includeCRs == False. Signal in CRs will not be patched.") for i_ch, ch in enumerate(ws["channels"]): - if ch["name"][:2] == "SR": # if channel name starts with 'SR' + if "SR" in ch["name"] or ("CR" in ch["name"] and self.includeCRs and nbCRwithEM == nbCRinWS): # if channel name starts with 'SR' or 'CR' if includeCRs wsChannelsInfo["signalRegions"].append( { "path": "/channels/" @@ -150,9 +164,9 @@ def getWSInfo(self): } ) # Number of bins else: - wsChannelsInfo["otherRegions"].append("/channels/" + str(i_ch)) + wsChannelsInfo["otherRegions"].append({'path': "/channels/" + str(i_ch), 'name': ch["name"]}) wsChannelsInfo["otherRegions"].sort( - key=lambda path: path.split("/")[-1], reverse=True + key=lambda path: int(path['path'].split("/")[-1]), reverse=True ) # Need to sort correctly the paths to the channels to be removed self.channelsInfo.append(wsChannelsInfo) @@ -197,7 +211,7 @@ class PyhfUpperLimitComputer: Class that computes the upper limit using the jsons files and signal informations in the 'data' instance of 'PyhfData' """ - def __init__(self, data, cl=0.95, includeCRs=False, lumi=None ): + def __init__(self, data, cl=0.95, lumi=None ): """ :param data: instance of 'PyhfData' holding the signals information @@ -224,7 +238,7 @@ def __init__(self, data, cl=0.95, includeCRs=False, lumi=None ): self.channelsInfo = self.data.channelsInfo self.zeroSignalsFlag = self.data.zeroSignalsFlag self.nWS = self.data.nWS - self.includeCRs = includeCRs + self.includeCRs = data.includeCRs self.patches = self.patchMaker() self.workspaces = self.wsMaker() self.workspaces_expected = self.wsMaker(apriori=True) @@ -284,7 +298,7 @@ def rescale(self, factor): def patchMaker(self): """ - Method that creates the list of patches to be applied to the self.inputJsons workspaces, one for each region given the 'self.nsignals and the informations available in self.channelsInfo and the content of the self.inputJsons + Method that creates the list of patches to be applied to the self.inputJsons workspaces, one for each region given the self.nsignals and the informations available in self.channelsInfo and the content of the self.inputJsons NB: It seems we need to include the change of the "modifiers" in the patches as well :return: the list of patches, one for each workspace @@ -292,7 +306,6 @@ def patchMaker(self): if self.channelsInfo == None: return None - nsignals = self.nsignals # Constructing the patches to be applied on the main workspace files patches = [] for ws, info, subSig in zip(self.inputJsons, self.channelsInfo, self.nsignals): @@ -308,14 +321,23 @@ def patchMaker(self): value["modifiers"] = [] value["modifiers"].append({"data": None, "type": "normfactor", "name": "mu_SIG"}) value["modifiers"].append({"data": None, "type": "lumi", "name": "lumi"}) + if self.data.signalUncertainty is not None and self.data.signalUncertainty != 0: + # Uncomment the line below to add a MC statistical uncertainty. + # value["modifiers"].append({"data": [sigBin*self.data.signalUncertainty for sigBin in value["data"]], "type": "staterror", "name": "MCError"}) + value["modifiers"].append({"data": {"hi_data": [sigBin*(1+self.data.signalUncertainty) for sigBin in value["data"]], + "lo_data": [sigBin*(1-self.data.signalUncertainty) for sigBin in value["data"]] + }, + "type": "histosys", + "name": "signalUncertainty" + }) value["name"] = "bsm" operator["value"] = value patch.append(operator) - if self.includeCRs: - logger.debug("keeping the CRs") - else: - for path in info["otherRegions"]: - patch.append({"op": "remove", "path": path}) + for region in info["otherRegions"]: + if 'CR' in region['name'] and self.includeCRs: + continue + else: + patch.append({"op": "remove", "path": region['path']}) patches.append(patch) return patches @@ -484,7 +506,7 @@ def likelihood( self, mu=1.0, workspace_index=None, return_nll=False, for i, ns in enumerate(self.data.nsignals): for j, v in enumerate(ns): self.data.nsignals[i][j] = v * mu - self.__init__(self.data, self.cl, self.includeCRs, self.lumi) + self.__init__(self.data, self.cl, self.lumi) ### allow this, for computation of l_SM # if self.zeroSignalsFlag[workspace_index] == True: # logger.warning("Workspace number %d has zero signals" % workspace_index) @@ -659,7 +681,7 @@ def lmax( self, workspace_index=None, return_nll=False, expected=False, "Values in x were outside bounds during a minimize step, clipping to bounds", ) - self.__init__(self.data, self.cl, self.includeCRs, self.lumi) + self.__init__(self.data, self.cl, self.lumi) if workspace_index == None: workspace_index = self.getBestCombinationIndex() if workspace_index != None: @@ -697,7 +719,6 @@ def lmax( self, workspace_index=None, return_nll=False, expected=False, _1, _2, o = pyhf.infer.mle.fit(workspace.data(model), model, return_fitted_val=True, return_result_obj = True, init_pars = list(muhat), method="BFGS" ) sigma_mu_temp = float ( np.sqrt ( o.hess_inv[model.config.poi_index][model.config.poi_index] ) ) - except (pyhf.exceptions.FailedMinimization,ValueError) as e: pass if abs ( sigma_mu_temp - 1.0 ) > 1e-5: @@ -705,11 +726,16 @@ def lmax( self, workspace_index=None, return_nll=False, expected=False, else: _, _, o = pyhf.infer.mle.fit(workspace.data(model), model, return_fitted_val=True, return_result_obj = True, method="L-BFGS-B" ) - sigma_mu_temp = float ( np.sqrt ( o.hess_inv.todense()[model.config.poi_index][model.config.poi_index] ) ) + if abs ( sigma_mu_temp - 1.0 ) < 1e-8: # Fischer information is nonsense + #Calculate inv_hess numerically + inv_hess = self.compute_invhess(o.x, workspace.data(model), model, model.config.poi_index) + sigma_mu_temp = float ( np.sqrt ( inv_hess)) if abs ( sigma_mu_temp - 1.0 ) > 1e-8: sigma_mu = sigma_mu_temp * self.scale -# sigma_mu = float ( o.unc[model.config.poi_index] ) * self.scale + else: # sigma_mu is still nonsense + logger.warning("sigma_mu computation failed, even with inv_hess numercially computed. sigma_mu will be set to 1.") + sigma_mu = 1. except (pyhf.exceptions.FailedMinimization, ValueError) as e: if pyhfinfo["backend"] == "pytorch": @@ -801,7 +827,7 @@ def getUpperLimitOnMu(self, expected=False, workspace_index=None): - else: choose best combo :return: the upper limit at 'self.cl' level (0.95 by default) """ - self.__init__(self.data, self.cl, self.includeCRs, self.lumi) + self.__init__(self.data, self.cl, self.lumi) if workspace_index in self.data.cachedULs[expected]: ret = self.data.cachedULs[expected][workspace_index] return ret diff --git a/smodels/statistics/statsTools.py b/smodels/statistics/statsTools.py index 48b53d045..3ada7149c 100755 --- a/smodels/statistics/statsTools.py +++ b/smodels/statistics/statsTools.py @@ -26,10 +26,10 @@ class StatsComputer: __slots__ = [ "nsig", "dataObject", "dataType", "likelihoodComputer", "data", "upperLimitComputer", "deltas_sys", "allowNegativeSignals" ] - + def __init__ ( self, dataObject : Union['DataSet','CombinedDataSet', list], dataType : str, nsig : Union[None,float,List] = None, - deltas_rel : Union[None,float] = None, + deltas_rel : Union[None,float] = None, allowNegativeSignals : bool = False): """ Initialise. @@ -63,11 +63,11 @@ def forSingleBin(cls, dataset, nsig, deltas_rel): :deltas_rel: Relative uncertainty for the signal :returns: a StatsComputer - """ - computer = StatsComputer(dataObject=dataset, - dataType="1bin", + """ + computer = StatsComputer(dataObject=dataset, + dataType="1bin", nsig=nsig, deltas_rel=deltas_rel) - + computer.getComputerSingleBin( ) return computer @@ -82,11 +82,11 @@ def forMultiBinSL(cls,dataset, nsig, deltas_rel): :returns: a StatsComputer """ - - computer = StatsComputer(dataObject=dataset, - dataType="SL", + + computer = StatsComputer(dataObject=dataset, + dataType="SL", nsig=nsig, deltas_rel=deltas_rel) - + computer.getComputerMultiBinSL( ) return computer @@ -101,10 +101,10 @@ def forPyhf(cls, dataset, nsig, deltas_rel): :returns: a StatsComputer """ - computer = StatsComputer(dataObject=dataset, - dataType="pyhf", + computer = StatsComputer(dataObject=dataset, + dataType="pyhf", nsig=nsig, deltas_rel=deltas_rel) - + computer.getComputerPyhf( ) return computer @@ -135,10 +135,10 @@ def forTruncatedGaussian(cls,theorypred, corr : float =0.6 ): kwargs = { "upperLimitOnMu": float(ul), "expectedUpperLimitOnMu": float(eul), "corr": corr } computer = StatsComputer(dataObject=theorypred.dataset, - dataType="truncGaussian", + dataType="truncGaussian", nsig=0., allowNegativeSignals=True) - + computer.getComputerTruncGaussian( **kwargs) return computer @@ -150,16 +150,16 @@ def forAnalysesComb(cls,theoryPredictions, deltas_rel): :param deltas_rel: relative error for the signal :returns: a StatsComputer """ - + # Only allow negative signal if all theory predictions allow for it allowNegativeSignals = all([tp.statsComputer.allowNegativeSignals for tp in theoryPredictions]) - computer = StatsComputer(dataObject=theoryPredictions, - dataType="analysesComb", + computer = StatsComputer(dataObject=theoryPredictions, + dataType="analysesComb", nsig=None, deltas_rel=deltas_rel, allowNegativeSignals=allowNegativeSignals) - + computer.getComputerAnalysesComb( ) return computer @@ -167,7 +167,7 @@ def forAnalysesComb(cls,theoryPredictions, deltas_rel): def getComputerSingleBin(self): """ Create computer from a single bin - + """ dataset = self.dataObject @@ -233,12 +233,20 @@ def getComputerPyhf(self ): logger.error( "Wrong json definition in globalInfo.jsonFiles for json : %s" % jsName) logger.debug("list of datasets: {}".format(datasets)) logger.debug("jsonFiles after filtering: {}".format(jsonFiles)) + includeCRs = False + if hasattr(globalInfo,'includeCRs'): + includeCRs = globalInfo.includeCRs + signalUncertainty = None + if hasattr(globalInfo,"signalUncertainty"): + signalUncertainty = globalInfo.signalUncertainty # Constructing the list of signals with subsignals matching each json nsignals = list() for jsName in jsonFiles: subSig = list() for srName in globalInfo.jsonFiles[jsName]: try: + if 'CR' in srName and not includeCRs: # Allow signal to leak in CRs only if they are kept + continue index = datasets.index(srName) except ValueError: line = ( @@ -250,15 +258,10 @@ def getComputerPyhf(self ): nsignals.append(subSig) # Loading the jsonFiles, unless we already have them (because we pickled) nsig = self.nsig - data = PyhfData(nsignals, jsons, jsonFiles) + data = PyhfData(nsignals, jsons, globalInfo.jsonFiles, includeCRs, signalUncertainty) if data.errorFlag: return None - if hasattr(globalInfo, "includeCRs"): - includeCRs = globalInfo.includeCRs - else: - includeCRs = False - self.upperLimitComputer = PyhfUpperLimitComputer(data, includeCRs=includeCRs, - lumi=self.dataObject.getLumi() ) + self.upperLimitComputer = PyhfUpperLimitComputer(data, lumi=self.dataObject.getLumi() ) self.likelihoodComputer = self.upperLimitComputer # for pyhf its the same def getComputerTruncGaussian ( self, **kwargs ): @@ -276,7 +279,7 @@ def getComputerAnalysesComb(self): :param nsig: signal yields. """ - self.upperLimitComputer = AnaCombLikelihoodComputer(theoryPredictions=self.dataObject, + self.upperLimitComputer = AnaCombLikelihoodComputer(theoryPredictions=self.dataObject, deltas_rel=self.deltas_sys) self.likelihoodComputer = self.upperLimitComputer # for analyses combination its the same @@ -284,8 +287,8 @@ def get_five_values ( self, expected : Union [ bool, Text ], return_nll : bool = False, check_for_maxima : bool = False )-> Dict: """ - Return the Five Values: l(bsm), l(sm), muhat, l(muhat), sigma(mu_hat) - + Return the Five Values: l(bsm), l(sm), muhat, l(muhat), sigma(mu_hat) + :param check_for_maxima: if true, then check lmax against l(sm) and l(bsm) correct, if necessary """ @@ -359,7 +362,7 @@ def transform ( self, expected ): def maximize_likelihood ( self, expected : Union[bool,Text], return_nll : bool = False ) -> dict: """ simple frontend to the individual computers, later spey - :param return_nll: if True, return negative log likelihood + :param return_nll: if True, return negative log likelihood :returns: Dictionary of llhd (llhd at mu_hat), \ muhat, sigma_mu (sigma of mu_hat), \ optionally also theta_hat @@ -378,7 +381,7 @@ def maximize_likelihood ( self, expected : Union[bool,Text], kwargs["expected"]=expected ret = self.likelihoodComputer.lmax ( return_nll = return_nll, - allowNegativeSignals = self.allowNegativeSignals, + allowNegativeSignals = self.allowNegativeSignals, **kwargs ) return ret @@ -386,7 +389,7 @@ def poi_upper_limit ( self, expected : Union [ bool, Text ], limit_on_xsec : bool = False ) -> float: """ Simple frontend to the upperlimit computers, later to spey.poi_upper_limit - + :param limit_on_xsec: if True, then return the limit on the cross section """