From 34ae8e7bc85aec24b4a6dea3a3a7ed723f935886 Mon Sep 17 00:00:00 2001 From: Christophe Delaere Date: Sat, 3 Aug 2013 13:06:53 +0200 Subject: [PATCH] Modification of the MadWeight bookkeeping Event weights are now tracked in association with a LHCO file. These files must be stored as (derived) samples and be directly or indirectly associated with a dataset. Other small improvements are also included, mostly in the behavior of the CLI scripts. --- SAMADhi.py | 40 ++++++++++++++++++++----- SAMADhi.sql | 34 ++++++++++++++++----- add_MadWeightCfg.py | 6 ---- add_sample.py | 4 +-- das_import.py | 37 +++++++++++++++-------- fill_weights.py | 73 ++++++++++++++++++++++++++++----------------- 6 files changed, 131 insertions(+), 63 deletions(-) diff --git a/SAMADhi.py b/SAMADhi.py index affc630..3414594 100644 --- a/SAMADhi.py +++ b/SAMADhi.py @@ -60,7 +60,7 @@ def __str__(self): result += " CMSSW release: %s\n"%str(self.cmssw_release) result += " global tag: %s\n"%str(self.globaltag) result += " type (data or mc): %s\n"%str(self.datatype) - result += " center-of-mass energy: %s\n"%str(self.energy) + result += " center-of-mass energy: %s TeV\n"%str(self.energy) result += " creation time (on DAS): %s\n"%str(self.creation_time) result += " comment: %s"%str(self.user_comment) return result @@ -210,7 +210,6 @@ class MadWeight(Storm): nwa = Int() cm_energy = Float() higgs_width = Float() - systematics = Unicode() ident_mw_card = Unicode() ident_card = Unicode() info_card = Unicode() @@ -236,7 +235,6 @@ def __str__(self): result += " Center of mass energy: %s\n"%str(self.cm_energy) result += " Transfert functions: %s\n"%str(self.transfer_fctVersion) result += " Higgs Width: %s\n"%str(self.higgs_width) - result += " systematics: %s"%str(self.systematics) return result def replaceBy(self, config): @@ -246,7 +244,6 @@ def replaceBy(self, config): self.isr = config.isr self.nwa = config.nwa self.higgs_width = config.higgs_width - self.systematics = config.systematics self.ident_mw_card = config.ident_mw_card self.ident_card = config.ident_card self.info_card = config.info_card @@ -261,16 +258,45 @@ def replaceBy(self, config): self.transfer_fctVersion = config.transfer_fctVersion self.transfer_function = config.transfer_function +class MadWeightRun(Storm): + """One run of MadWeight. It relates a MW setup to a LHCO file + and may contain a systematics flag + comment.""" + __storm_table__ = "madweightrun" + mwrun_id = Int(primary=True) + madweight_process = Int() + lhco_sample_id = Int() + creation_time = DateTime() + systematics = Unicode() + user_comment = Unicode() + version = Int() + process = Reference(madweight_process,"MadWeight.process_id") + lhco_sample = Reference(lhco_sample_id, "Sample.sample_id") + + def __init__(self,madweight_process,lhco_sample_id): + self.madweight_process = madweight_process + self.lhco_sample_id = lhco_sample_id + + def __str__(self): + result = "MadWeight run #%s performed on %s\n"%(str(self.mwrun_id),str(self.creation_time)) + result += " MadWeight process: %s (id %s)\n"%(str(self.process.name),str(self.madweight_process)) + result += " LHCO sample: %s (id %s)\n"%(str(self.lhco_sample.name),str(self.lhco_sample_id)) + result += " Systematics: %s\n"%str(self.systematics) + result += " Comment: %s\n"%str(self.user_comment) + result += " Version: %s\n"%str(self.version) + return result + class Weight(Storm): """One weight. It relates one event and one MadWeight setup to one value + uncertainty""" __storm_table__ = "weight" weight_id = Int(primary=True) event_id = Int() - madweight_process = Int() + madweight_run = Int() value = Float() uncertainty = Float() - version = Int() event = Reference(event_id,"Event.event_id") - process = Reference(madweight_process,"MadWeight.process_id") + mw_run = Reference(madweight_run,"MadWeightRun.mwrun_id") + + def __str__(self): + return "%f +/- %f"%(self.value,self.uncertainty) diff --git a/SAMADhi.sql b/SAMADhi.sql index 03b501f..9272451 100644 --- a/SAMADhi.sql +++ b/SAMADhi.sql @@ -1,4 +1,5 @@ DROP TABLE IF EXISTS weight; +DROP TABLE IF EXISTS madweightrun; DROP TABLE IF EXISTS madweight; DROP TABLE IF EXISTS event; DROP TABLE IF EXISTS sampleresult; @@ -32,7 +33,7 @@ path varchar(255) NOT NULL, sampletype varchar(255) NOT NULL, nevents_processed int, nevents int, -normalization float DEFAULT 1.0, +normalization float NOT NULL DEFAULT 1.0, luminosity float, code_version varchar(255), user_comment text, @@ -41,7 +42,9 @@ creation_time timestamp, source_dataset_id int, source_sample_id int, PRIMARY KEY (sample_id), -KEY idx_name (name) +KEY idx_name (name), +FOREIGN KEY (source_dataset_id) REFERENCES dataset(dataset_id), +FOREIGN KEY (source_sample_id) REFERENCES sample(sample_id) ); CREATE TABLE result @@ -59,7 +62,9 @@ CREATE TABLE sampleresult ( sample_id int NOT NULL, result_id int NOT NULL, -CONSTRAINT SR_ID PRIMARY KEY (sample_id,result_id) +CONSTRAINT SR_ID PRIMARY KEY (sample_id,result_id), +FOREIGN KEY (sample_id) REFERENCES sample(sample_id), +FOREIGN KEY (result_id) REFERENCES result(result_id) ); CREATE TABLE event @@ -81,7 +86,6 @@ isr int NOT NULL, nwa int NOT NULL, cm_energy float NOT NULL, higgs_width float, -systematics varchar(255), ident_mw_card text NOT NULL, ident_card text NOT NULL, info_card text NOT NULL, @@ -98,15 +102,31 @@ PRIMARY KEY (process_id), KEY idx_name (name) ); +CREATE TABLE madweightrun +( +mwrun_id int NOT NULL AUTO_INCREMENT, +madweight_process int NOT NULL, +lhco_sample_id int NOT NULL, +systematics varchar(255), +version tinyint NOT NULL DEFAULT 1, +user_comment text, +creation_time timestamp, +PRIMARY KEY (mwrun_id), +UNIQUE INDEX (madweight_process,lhco_sample_id,systematics,version), +FOREIGN KEY (madweight_process) REFERENCES madweight(process_id), +FOREIGN KEY (lhco_sample_id) REFERENCES sample(sample_id) +); + CREATE TABLE weight ( weight_id BIGINT NOT NULL AUTO_INCREMENT, event_id BIGINT NOT NULL, -madweight_process int NOT NULL, +madweight_run int NOT NULL, value float, uncertainty float, -version tinyint DEFAULT 1, PRIMARY KEY (weight_id), -UNIQUE INDEX (event_id,madweight_process,version) +UNIQUE INDEX (event_id,madweight_run), +FOREIGN KEY (event_id) REFERENCES event(event_id), +FOREIGN KEY (madweight_run) REFERENCES madweightrun(mwrun_id) ); diff --git a/add_MadWeightCfg.py b/add_MadWeightCfg.py index 73fd05d..31fcdca 100755 --- a/add_MadWeightCfg.py +++ b/add_MadWeightCfg.py @@ -27,9 +27,6 @@ def __init__(self): usage += " where name is the configuration name\n" usage += " and where path points to the MadWeight directory" self.parser = OptionParser(usage=usage) - self.parser.add_option("-s", "--syst", action="store", type="string", - default="", dest="syst", - help="string identifying the systematics variation of the weight") def get_opt(self): """ @@ -58,7 +55,6 @@ def main(): madweightCfg = MadWeight(unicode(opts.name)) for card in cards: setattr(madweightCfg, card, unicode(open(opts.path+"/Cards/"+card+".dat","r").read())) - madweightCfg.systematics = unicode(opts.syst) # get the transfert functions madweightCfg.transfer_fctVersion = unicode(open('%s/Source/MadWeight/transfer_function/Transfer_FctVersion.txt'%opts.path,"r").read().strip('\n')) theCfg = madweightCfg.transfer_fctVersion.split(':')[0] @@ -99,8 +95,6 @@ def main(): fields = cfg.split() if fields[1]=="25": madweightCfg.higgs_width = float(fields[2]) -#TODO: temporary - print madweightCfg # connect to the MySQL database using default credentials dbstore = DbStore() # check that there is no existing entry diff --git a/add_sample.py b/add_sample.py index 7edb3ee..ca7f1cf 100755 --- a/add_sample.py +++ b/add_sample.py @@ -50,7 +50,7 @@ def __init__(self): help="author of the result. If not specified, is taken from the path.") self.parser.add_option("-t", "--time", action="store", type="string", default=None, dest="time", - help="result timestamp. If set to \"path\", timestamp will be taken from the path. Otherwise, it must be formated like YYYY-MM-DD HH:MM:SS") + help="result timestamp. If set to \"path\", timestamp will be taken from the path. Otherwise, it must be formated like YYYY-MM-DD HH:MM:SS. Default is current time.") def get_opt(self): """ @@ -63,7 +63,7 @@ def get_opt(self): opts.sampletype = args[0] opts.path = os.path.abspath(os.path.expandvars(os.path.expanduser(args[1]))) # check path - if not os.path.exists(opts.path) or not os.path.isdir(opts.path): + if not os.path.exists(opts.path) or not ( os.path.isdir(opts.path) or os.path.isfile(opts.path)) : self.parser.error("%s is not an existing directory"%opts.path) # set author if opts.author is None: diff --git a/das_import.py b/das_import.py index d5b9d18..b86098a 100755 --- a/das_import.py +++ b/das_import.py @@ -18,7 +18,7 @@ import httplib import string import pprint -from optparse import OptionParser +from optparse import OptionParser, OptionGroup from datetime import datetime from SAMADhi import Dataset, DbStore from userPrompt import confirm @@ -66,36 +66,39 @@ def __init__(self): default=0.0, dest="xsection", help="specify the cross-section.") self.parser.add_option("--energy", action="store", type="float", - default=0.0, dest="energy", + default=None, dest="energy", help="specify the centre of mass energy.") self.parser.add_option("--comment", action="store", type="string", default="", dest="comment", help="comment about the dataset") - # ---- DAQ options + # ---- DAS options + das_group = OptionGroup(self.parser,"DAS options", + "The following options control the communication with the DAS server") msg = "host name of DAS cache server, default is https://cmsweb.cern.ch" - self.parser.add_option("--host", action="store", type="string", + das_group.add_option("--host", action="store", type="string", default='https://cmsweb.cern.ch', dest="host", help=msg) msg = "index for returned result" - self.parser.add_option("--idx", action="store", type="int", + das_group.add_option("--idx", action="store", type="int", default=0, dest="idx", help=msg) msg = 'query waiting threshold in sec, default is 5 minutes' - self.parser.add_option("--threshold", action="store", type="int", + das_group.add_option("--threshold", action="store", type="int", default=300, dest="threshold", help=msg) msg = 'specify private key file name' - self.parser.add_option("--key", action="store", type="string", + das_group.add_option("--key", action="store", type="string", default="", dest="ckey", help=msg) msg = 'specify private certificate file name' - self.parser.add_option("--cert", action="store", type="string", + das_group.add_option("--cert", action="store", type="string", default="", dest="cert", help=msg) msg = 'specify number of retries upon busy DAS server message' - self.parser.add_option("--retry", action="store", type="string", + das_group.add_option("--retry", action="store", type="string", default=0, dest="retry", help=msg) msg = 'drop DAS headers' - self.parser.add_option("--das-headers", action="store_true", + das_group.add_option("--das-headers", action="store_true", default=False, dest="das_headers", help=msg) msg = 'verbose output' - self.parser.add_option("-v", "--verbose", action="store", + das_group.add_option("-v", "--verbose", action="store", type="int", default=0, dest="verbose", help=msg) + self.parser.add_option_group(das_group) def get_opt(self): """ Returns parse list of options @@ -103,10 +106,18 @@ def get_opt(self): opts, args = self.parser.parse_args() # mandatory arguments if len(args) < 1: - self.parser.error("name and process are mandatory") + self.parser.error("Name and process are mandatory.") + if len(args) > 1: + self.parser.error("Too many arguments.") opts.sample = args[0] if opts.process is None: - opts.process = string.split(opts.sample,'/',2)[1] + splitString = string.split(opts.sample,'/',2) + if len(splitString)>1: + opts.process = string.split(opts.sample,'/',2)[1] + if opts.energy is None: + energyRe = re.search(r"([\d.]+)TeV",opts.sample) + if not energyRe is None: + opts.energy = float(energyRe.group(1)) return opts def fullpath(path): diff --git a/fill_weights.py b/fill_weights.py index 6e5cd7a..837fc84 100755 --- a/fill_weights.py +++ b/fill_weights.py @@ -4,7 +4,7 @@ import os from optparse import OptionParser -from SAMADhi import Dataset, Event, MadWeight, Weight, DbStore +from SAMADhi import Dataset, Sample, Event, MadWeight, MadWeightRun, Weight, DbStore from userPrompt import confirm class MyOptionParser: @@ -12,14 +12,21 @@ class MyOptionParser: Client option parser """ def __init__(self): - usage = "Usage: %prog dataset process file [options]\n" - usage += " where dataset is the id of the dataset containing the events,\n" + #TODO: we could allow to guess the process and lhco by name or path as well + usage = "Usage: %prog lhco_id process file [options]\n" + usage += " where lhco_id is the sample id of the LHCO,\n" usage += " process is the id of the MadWeight process,\n" usage += " and file is the output file containing the weights." self.parser = OptionParser(usage=usage) self.parser.add_option("-v", "--version", action="store", type="int", default=None, dest="version", help="version of that particular weight") + self.parser.add_option("-s", "--syst", action="store", type="string", + default="", dest="syst", + help="string identifying the systematics variation of the weight") + self.parser.add_option("-c", "--comment", action="store", type="string", + default="", dest="comment", + help="user comment") def get_opt(self): """ @@ -27,14 +34,22 @@ def get_opt(self): """ opts, args = self.parser.parse_args() if len(args) < 3: - self.parser.error("sample process and file are mandatory") - opts.dataset = int(args[0]) + self.parser.error("lhco, process and file are mandatory") + opts.lhco_id = int(args[0]) opts.process = int(args[1]) opts.filepath = args[2] if not os.path.exists(opts.filepath) or not os.path.isfile(opts.filepath): self.parser.error("%s is not an existing file"%opts.filepath) return opts +def findDataset(sample): + if sample.source_dataset_id is not None: + return sample.source_dataset_id + elif sample.source_sample_id is not None and sample.source_sample_id != sample.sample_id: + return findDataset(sample.source_sample) + else: + return None + def main(): """Main function""" # get the options @@ -42,17 +57,34 @@ def main(): opts = optmgr.get_opt() # connect to the MySQL database using default credentials dbstore = DbStore() - # check that the dataset exists - check = dbstore.find(Dataset,Dataset.dataset_id==opts.dataset) - if check.is_empty(): - raise IndexError("No dataset with such index: %d"%opts.dataset) + # check that the LHCO exists and obtain the dataset id + check = dbstore.find(Sample,Sample.sample_id==opts.lhco_id) + if check.is_empty() or check.one().sampletype != "LHCO": + raise IndexError("No LHCO with such index: %d"%opts.lhco_id) + opts.dataset = findDataset(check.one()) + if opts.dataset is None: + raise RuntimeError("Impossible to get the dataset id.") # check that the process exists check = dbstore.find(MadWeight,MadWeight.process_id==opts.process) if check.is_empty(): raise IndexError("No process with such index: %d"%opts.process) + # create the MW run object + mw_run = MadWeightRun(opts.process,opts.lhco_id) + mw_run.systematics = unicode(opts.syst) + mw_run.user_comment = unicode(opts.comment) + mw_run.version = opts.version + if mw_run.version is None: + check = dbstore.find(MadWeightRun,(MadWeightRun.madweight_process==mw_run.madweight_process) & (MadWeightRun.lhco_sample_id==mw_run.lhco_sample_id)) + if not check.is_empty(): + mw_run.version = check.order_by(MadWeightRun.version).last().version + 1 + else: + mw_run.version = 1 + else: + check = dbstore.find(MadWeightRun,(MadWeightRun.madweight_process==mw_run.madweight_process) & (MadWeightRun.lhco_sample_id==mw_run.lhco_sample_id) & (MadWeightRun.version==mw_run.version)) + if not check.is_empty(): + raise RuntimeError("There is already one such MadWeight run with the same version number:\n%s\n"%str(check.one())) # read the file inputfile = open(opts.filepath) - versions = set() count = 0 for line in inputfile: data = line.rstrip('\n').split('\t') @@ -62,34 +94,19 @@ def main(): event_query = dbstore.find(Event, (Event.event_number==event_number) & (Event.run_number==run_number) & (Event.dataset_id==opts.dataset)) if event_query.is_empty(): event = Event(event_number,run_number,opts.dataset) - if opts.version is None: opts.version = 1 else: event = event_query.one() - # in that case, make sure there is no similar (process + version) weight already - if opts.version is None: - check = event.weights.find(Weight.madweight_process==opts.process).order_by(Weight.version) - if check.is_empty(): - opts.version = 1 - else: - lastver = check.last().version - opts.version = lastver+1 - else: - check = event.weights.find((Weight.madweight_process==opts.process) & (Weight.version==opts.version)) - if not check.is_empty(): - raise ValueError("There is already a weight for process %d with version %d"%(opts.process,opts.version)) # create the weight weight = Weight() weight.event = event - weight.madweight_process = opts.process + weight.mw_run = mw_run weight.value = float(data[1]) weight.uncertainty = float(data[2]) - weight.version = opts.version dbstore.add(weight) - versions.add(opts.version) count += 1 # confirm and commit - print "Adding weights to %d events with the following version(s) (should be unique):"%count - print versions + print mw_run + print "Adding weights to %d events."%count if confirm(prompt="Insert into the database?", resp=True): dbstore.commit()