From 7a1910f5ff32e90854dfb9d475e2b8bc7b863a03 Mon Sep 17 00:00:00 2001 From: Juraj Smiesko <34742917+kjvbrt@users.noreply.github.com> Date: Wed, 18 Sep 2024 15:33:51 +0200 Subject: [PATCH] Synchronization of final and plots stages (#397) * Decoupling latex table printing from rdf running * Further minor adjustments * Providing default values in Hismaker Plot * Storing graph in working directory if path not specified * Before scaling the histograms in plots stage check final stage results * Now possible to have no signal or no background in the plot * Putting scaling determination to separate function --- README.md | 5 + .../higgs/mH-recoil/mumu/analysis_final.py | 126 +++- .../higgs/mH-recoil/mumu/analysis_plots.py | 46 +- man/man7/fccanalysis-final-script.7 | 118 ++++ man/man7/fccanalysis-plots-script.7 | 119 ++++ python/anascript.py | 2 +- python/do_plots.py | 370 +++++++--- python/frame.py | 7 +- python/parsers.py | 7 +- python/run_final_analysis.py | 648 +++++++++++------- 10 files changed, 1023 insertions(+), 425 deletions(-) create mode 100644 man/man7/fccanalysis-final-script.7 create mode 100644 man/man7/fccanalysis-plots-script.7 diff --git a/README.md b/README.md index 2e4488ab7b..fb21b09edd 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,10 @@ # FCCAnalyses +[![DOI](https://zenodo.org/badge/177151745.svg)](https://zenodo.org/doi/10.5281/zenodo.4767810) + +![test](https://github.com/HEP-FCC/FCCAnalyses/actions/workflows/test.yml/badge.svg) +![docs](https://github.com/HEP-FCC/FCCAnalyses/actions/workflows/docs.yml/badge.svg) + Common framework for FCC related analyses. This framework allows one to write full analysis, taking [EDM4hep](https://github.com/key4hep/EDM4hep) input ROOT files and producing the plots. diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py index e1075a43c3..90a987a505 100644 --- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py +++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_final.py @@ -1,50 +1,110 @@ -#Input directory where the files produced at the pre-selection level are -inputDir = "outputs/FCCee/higgs/mH-recoil/mumu/stage2/" +''' +Final stage of the example Z(mumu)H recoil mass analysis. +''' -#Input directory where the files produced at the pre-selection level are -outputDir = "outputs/FCCee/higgs/mH-recoil/mumu/final/" +# Input directory where the files produced in the pre-selection stages are +# stored +inputDir = "outputs/FCCee/higgs/mH-recoil/mumu/stage2/" + +# Output directory where the resulting files will be stored +outputDir = "outputs/FCCee/higgs/mH-recoil/mumu/final/" processList = { - 'p8_ee_ZZ_ecm240':{},#Run over the full statistics from stage2 input file /p8_ee_ZZ_ecm240.root. Keep the same output name as input - 'p8_ee_WW_ecm240':{}, #Run over the statistics from stage2 input files /p8_ee_WW_ecm240_out/*.root. Keep the same output name as input - 'MySample_p8_ee_ZH_ecm240':{} #Run over the full statistics from stage2 input file /p8_ee_ZH_ecm240_out.root. Change the output name to MySample_p8_ee_ZH_ecm240 + # Run over the full statistics from stage2 input file + # /p8_ee_ZZ_ecm240.root. Keep the same output name as input + 'p8_ee_ZZ_ecm240': {}, + # Run over the statistics from stage2 input files + # /p8_ee_WW_ecm240_out/*.root. Keep the same output name as input + 'p8_ee_WW_ecm240': {}, + # Run over the full statistics from stage2 input file + # /p8_ee_ZH_ecm240_out.root. Change the output name to + # MySample_p8_ee_ZH_ecm240 + 'MySample_p8_ee_ZH_ecm240': {} } -#Link to the dictonary that contains all the cross section informations etc... +# Link to the dictionary that contains all the cross section information etc... procDict = "FCCee_procDict_spring2021_IDEA.json" -#Add MySample_p8_ee_ZH_ecm240 as it is not an offical process -procDictAdd={"MySample_p8_ee_ZH_ecm240":{"numberOfEvents": 10000000, "sumOfWeights": 10000000, "crossSection": 0.201868, "kfactor": 1.0, "matchingEfficiency": 1.0}} +# Add MySample_p8_ee_ZH_ecm240 as it is not an offical process +procDictAdd = {"MySample_p8_ee_ZH_ecm240": {"numberOfEvents": 10000000, + "sumOfWeights": 10000000, + "crossSection": 0.201868, + "kfactor": 1.0, + "matchingEfficiency": 1.0}} + +# Expected integrated luminosity +intLumi = 5.0e+06 # pb-1 -#Number of CPUs to use +# Whether to scale to expected integrated luminosity +doScale = True + +# Number of threads to use nCPUS = 2 -#produces ROOT TTrees, default is False -doTree = False +# Whether to produce ROOT TTrees, default is False +doTree = True +# Save cut yields and efficiencies in LaTeX table saveTabular = True -###Dictionnay of the list of cuts. The key is the name of the selection that will be added to the output file -cutList = {"sel0":"Zcand_q == 0", - "sel1":"Zcand_q == -1 || Zcand_q == 1", - "sel2":"Zcand_m > 80 && Zcand_m < 100", - "sel3":"MyFilter==true && (Zcand_m < 80 || Zcand_m > 100)" - } +# Save cut yields and efficiencies in JSON file +saveJSON = True + +# Dictionary with the list of cuts. The key is the name of the selection that +# will be added to the output file +cutList = {"sel0": "Zcand_q == 0", + "sel1": "Zcand_q == -1 || Zcand_q == 1", + "sel2": "Zcand_m > 80 && Zcand_m < 100", + "sel3": "MyFilter==true && (Zcand_m < 80 || Zcand_m > 100)"} -#Dictionary for the ouput variable/hitograms. The key is the name of the variable in the output files. "name" is the name of the variable in the input file, "title" is the x-axis label of the histogram, "bin" the number of bins of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum x-axis value. +# Dictionary for the output variables/histograms. The key is the name of the +# variable in the output files. "name" is the name of the variable in the input +# file, "title" is the x-axis label of the histogram, "bin" the number of bins +# of the histogram, "xmin" the minimum x-axis value and "xmax" the maximum +# x-axis value. histoList = { - "mz":{"name":"Zcand_m","title":"m_{Z} [GeV]","bin":125,"xmin":0,"xmax":250}, - "mz_zoom":{"name":"Zcand_m","title":"m_{Z} [GeV]","bin":40,"xmin":80,"xmax":100}, - "leptonic_recoil_m":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":100,"xmin":0,"xmax":200}, - "leptonic_recoil_m_zoom":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":200,"xmin":80,"xmax":160}, - "leptonic_recoil_m_zoom1":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":100,"xmin":120,"xmax":140}, - "leptonic_recoil_m_zoom2":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":200,"xmin":120,"xmax":140}, - "leptonic_recoil_m_zoom3":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":400,"xmin":120,"xmax":140}, - "leptonic_recoil_m_zoom4":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":800,"xmin":120,"xmax":140}, - "leptonic_recoil_m_zoom5":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":2000,"xmin":120,"xmax":140}, - "leptonic_recoil_m_zoom6":{"name":"Zcand_recoil_m","title":"Z leptonic recoil [GeV]","bin":100,"xmin":130.3,"xmax":132.5}, - "mz_1D":{"cols":["Zcand_m"],"title":"m_{Z} [GeV]", "bins": [(40,80,100)]}, # 1D histogram (alternative syntax) - "mz_recoil_2D":{"cols":["Zcand_m", "Zcand_recoil_m"],"title":"m_{Z} - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140)]}, # 2D histogram - "mz_recoil_3D":{"cols":["Zcand_m", "Zcand_recoil_m", "Zcand_recoil_m"],"title":"m_{Z} - leptonic recoil - leptonic recoil [GeV]", "bins": [(40,80,100), (100,120,140), (100,120,140)]}, # 3D histogram + "mz": {"name": "Zcand_m", + "title": "m_{Z} [GeV]", + "bin": 125, "xmin": 0, "xmax": 250}, + "mz_zoom": {"name": "Zcand_m", + "title": "m_{Z} [GeV]", + "bin": 40, "xmin": 80, "xmax": 100}, + "leptonic_recoil_m": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 100, "xmin": 0, "xmax": 200}, + "leptonic_recoil_m_zoom": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 200, "xmin": 80, "xmax": 160}, + "leptonic_recoil_m_zoom1": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 100, "xmin": 120, "xmax": 140}, + "leptonic_recoil_m_zoom2": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 200, "xmin": 120, "xmax": 140}, + "leptonic_recoil_m_zoom3": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 400, "xmin": 120, "xmax": 140}, + "leptonic_recoil_m_zoom4": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 800, "xmin": 120, "xmax": 140}, + "leptonic_recoil_m_zoom5": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 2000, "xmin": 120, "xmax": 140}, + "leptonic_recoil_m_zoom6": {"name": "Zcand_recoil_m", + "title": "Z leptonic recoil [GeV]", + "bin": 100, "xmin": 130.3, "xmax": 132.5}, + # 1D histogram (alternative syntax) + "mz_1D": {"cols": ["Zcand_m"], + "title": "m_{Z} [GeV]", + "bins": [(40, 80, 100)]}, + # 2D histogram + "mz_recoil_2D": {"cols": ["Zcand_m", "Zcand_recoil_m"], + "title": "m_{Z} - leptonic recoil [GeV]", + "bins": [(40, 80, 100), (100, 120, 140)]}, + # 3D histogram + "mz_recoil_3D": { + "cols": ["Zcand_m", "Zcand_recoil_m", "Zcand_recoil_m"], + "title": "m_{Z} - leptonic recoil - leptonic recoil [GeV]", + "bins": [(40, 80, 100), (100, 120, 140), (100, 120, 140)]}, } diff --git a/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py b/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py index 0f6aca0212..f8673dd22a 100644 --- a/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py +++ b/examples/FCCee/higgs/mH-recoil/mumu/analysis_plots.py @@ -1,25 +1,33 @@ import ROOT -# global parameters -intLumi = 5.0e+06 #in pb-1 +# Global parameters +intLumi = 5.0e+06 # in pb-1 ana_tex = 'e^{+}e^{-} #rightarrow ZH #rightarrow #mu^{+}#mu^{-} + X' delphesVersion = '3.4.2' energy = 240.0 collider = 'FCC-ee' inputDir = 'outputs/FCCee/higgs/mH-recoil/mumu/final/' -formats = ['png','pdf'] -yaxis = ['lin','log'] -stacksig = ['stack','nostack'] +formats = ['png', 'pdf'] +yaxis = ['lin', 'log'] +stacksig = ['stack', 'nostack'] outdir = 'outputs/FCCee/higgs/mH-recoil/mumu/plots/' plotStatUnc = True -variables = ['mz','mz_zoom','leptonic_recoil_m','leptonic_recoil_m_zoom','leptonic_recoil_m_zoom2'] -rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional) +variables = ['mz', + 'mz_zoom', + 'leptonic_recoil_m', + 'leptonic_recoil_m_zoom', + 'leptonic_recoil_m_zoom2'] +rebin = [1, 1, 1, 1, 2] # uniform rebin per variable (optional) -###Dictonnary with the analysis name as a key, and the list of selections to be plotted for this analysis. The name of the selections should be the same than in the final selection +# Dictionary with the list of selections to be plotted for this analysis. The +# name of the selections should be the same than in the final selection selections = {} -selections['ZH'] = ["sel0","sel1"] -selections['ZH_2'] = ["sel0","sel1"] +selections['ZH'] = ["sel0", "sel1"] +selections['ZH_2'] = ["sel0", "sel1"] +selections['ZH_3'] = ["sel0", "sel1"] +selections['ZH_4'] = ["sel0", "sel1"] +selections['ZH_5'] = ["sel0", "sel1"] extralabel = {} extralabel['sel0'] = "Selection: N_{Z} = 1" @@ -32,15 +40,19 @@ colors['VV'] = ROOT.kGreen+3 plots = {} -plots['ZH'] = {'signal':{'ZH':['MySample_p8_ee_ZH_ecm240']}, - 'backgrounds':{'WW':['p8_ee_WW_ecm240'], - 'ZZ':['p8_ee_ZZ_ecm240']} - } +plots['ZH'] = {'signal': {'ZH': ['MySample_p8_ee_ZH_ecm240']}, + 'backgrounds': {'WW': ['p8_ee_WW_ecm240'], + 'ZZ': ['p8_ee_ZZ_ecm240']}} +plots['ZH_2'] = {'signal': {'ZH': ['MySample_p8_ee_ZH_ecm240']}, + 'backgrounds': {'VV': ['p8_ee_WW_ecm240', 'p8_ee_ZZ_ecm240']}} -plots['ZH_2'] = {'signal':{'ZH':['MySample_p8_ee_ZH_ecm240']}, - 'backgrounds':{'VV':['p8_ee_WW_ecm240','p8_ee_ZZ_ecm240']} - } +plots['ZH_3'] = {'signal': {'ZH': ['MySample_p8_ee_ZH_ecm240']}} + +plots['ZH_4'] = {'backgrounds': {'VV': ['p8_ee_WW_ecm240', 'p8_ee_ZZ_ecm240']}} + +plots['ZH_5'] = {'backgrounds': {'WW': ['p8_ee_WW_ecm240'], + 'ZZ': ['p8_ee_ZZ_ecm240']}} legend = {} legend['ZH'] = 'ZH' diff --git a/man/man7/fccanalysis-final-script.7 b/man/man7/fccanalysis-final-script.7 new file mode 100644 index 0000000000..0147e1627e --- /dev/null +++ b/man/man7/fccanalysis-final-script.7 @@ -0,0 +1,118 @@ +.\" Manpage for fccanalysis-final-script +.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos. +.TH FCCANALYSIS\-FINAL\-SCRIPT 7 "05 Aug 2024" "0.9.0" "fccanalysis-final-script man page" +.SH NAME +\fBfccanalysis\-final\-script\fR \(en analysis script for the final stage of the +analysis +.SH SYNOPSIS +.IP +* +.SH DESCRIPTION +.PP +The analysis script for the final stage of the analysis is expected to be a +valid Python script containing definitions of the cuts and the final +histograms\&. To run the final stage do +.IP +fccanalysis final \fIanalysis_script.py\fR + +.RE +.SH ATTRIBUTES +User can use the following global attributes to control the behavior of the +final analysis stage\&. +.TP +\fBprocDict\fR (mandatory) +This variable controls which process dictionary will be used. It can be either +simple file name, absolute path or url\&. In the case of a simple filename, the +file is being searched for first in the working directory and then at the +locations indicated in the $FCCDICTSDIR environment variable\&. +.TP +\fBprocDictAdd\fR (optional) +User can provide additional processes, which are not included in the main +processes dictionary\&. +.TP +\fBinputDir\fR (mandatory) +User has to specify the directory where the input files coming from the previous +analysis stage are stored\&. +.TP +\fBoutputDir\fR (optional) +User can specify the directory for the output files\&. +.br +Default value: \&. (current working directory) +.TP +\fBnCPUS\fR (optional) +Number of threads the RDataFrame will use\&. If -1 is specified all available +cores will be used\&. +.br +Default value: 4 +.TP +\fBcutList\fR (optional) +Dictionary of cuts to be done of form ": "\&. The +cuts are independent and there is one special cut "all_events"\&. +.br +Default value: Empty dictionary +.TP +\fBcutLabels\fR (optional) +Dictionary of labels for cuts defined in \fBcutList\fR which will be used in +LaTeX table(s)\&. +.br +Default value: Empty dictionary +.TP +\fBdoScale\fR (optional) +Whether to scale the results to the expected integrated luminosity\&. +.br +Default value: True +.TP +\fBintLumi\fR (optional) +Expected integrated luminosity in pb^{-1}\&. +.br +Default value: 1.0 pb^{-1} +.TP +\fBdoTree\fR (optional) +Save events passing final cuts into output TTree\&. +.br +Default value: False +.TP +\fBsaveTabular\fR (optional) +Save results into LaTeX table(s)\&. +.br +Default value: False +.TP +\fBsaveJSON\fR (optional) +Save results into JSON file. +.br +Default value: False +.PP +This section is under construction. You are invited to help :) +.SH SEE ALSO +fccanalysis(1), fccanalysis-final(1) +.SH BUGS +Many +.SH AUTHORS +There are many contributors to the FCCAnalyses framework, but the principal +authors are: +.in +4 +Clement Helsens +.br +Valentin Volkl +.br +Gerardo Ganis +.SH FCCANALYSES +Part of the FCCAnalyses framework\&. +.SH LINKS +.PP +.UR https://hep-fcc\&.github\&.io/FCCAnalyses/ +FCCAnalyses webpage +.UE +.PP +.UR https://github\&.com/HEP\-FCC/FCCAnalyses/ +FCCAnalysises GitHub repository +.UE +.PP +.UR https://fccsw\-forum\&.web\&.cern\&.ch/ +FCCSW Forum +.UE +.SH CONTACT +.pp +.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch +FCC-PED-SoftwareAndComputing-Analysis +.ME diff --git a/man/man7/fccanalysis-plots-script.7 b/man/man7/fccanalysis-plots-script.7 new file mode 100644 index 0000000000..a332b3435b --- /dev/null +++ b/man/man7/fccanalysis-plots-script.7 @@ -0,0 +1,119 @@ +.\" Manpage for fccanalysis-plots-script +.\" Contact FCC-PED-SoftwareAndComputing-Analysis@cern.ch to correct errors or typos. +.TH FCCANALYSIS\-PLOTS\-SCRIPT 7 "16 Sep 2024" "0.9.0" "fccanalysis-plots-script man page" +.SH NAME +\fBfccanalysis\-plots\-script\fR \(en analysis script for the plots stage of the +analysis +.SH SYNOPSIS +.IP +* +.SH DESCRIPTION +.PP +The analysis script for the plots stage of the analysis is expected to be a +valid Python script containing definitions of the plots out of the +histograms obtained in the final stage of the analysis or from histmaker run\&. +To run the plots stage do +.IP +fccanalysis plots \fIanalysis_script.py\fR + +.RE +.SH ATTRIBUTES +User can use the following global attributes to control the behavior of the +plots stage\&. +.TP +\fBinputDir\fR or \fBindir\fR (mandatory) +User has to specify the directory where the input files coming from the previous +analysis stage are stored\&. +.TP +\fBoutputDir\fR or \fBoutdir\fR (optional) +User can specify the directory for the output files\&. +.br +Default value: \&. (current working directory) +.TP +\fBdoScale\fR (optional) +Whether to scale the histograms to the expected integrated luminosity\&. It is +recommended to scale your histograms already in the \fBfinal\fR stage of the +analysis\&. +.br +Default value: False +.TP +\fBintLumi\fR (mandatory) +Expected integrated luminosity in pb^{-1}\&. The value will be used in the +figures, but also to scale histograms if \fBdoScale\fR is set to True\&. +.br +Default value: Value from the input file +.TP +\fBintLumiLabel\fR (optional) +Custom label for the expected integrated luminosity\&. +.br +Default value: None +.TP +\fBscaleSig\fR (optional) +Additional scale factor to be applied on the signal histograms\&. +.br +Default value: 1.0 +.TP +\fBscaleBkg\fR (optional) +Additional scale factor to be applied on the background histograms\&. +.br +Default value: 1.0 +.TP +\fBplots\fR (mandatory) +Dictionary of plots to be created\&. +.br +Default value: {} (empty dictionary) +.TP +\fBsplitLeg\fR (optional) +Whether to split legend into two columns\&. +.br +Default value: False +.TP +\fBlegendCoord\fR (optional) +Adjusts position of the legend\&. Takes a list of four values\&. +.br +Default value: List of four None +.TP +\fBplotStatUnc\fR (optional) +Whether to plot statistical uncertainty\&. +.br +Default value: False +.TP +\fBlegendTextSize\fR (optional) +Adjusts size of the text in the plot's legend\&. +.br +Default value: 0.035 +.PP +This section is under construction. You are invited to help :) +.SH SEE ALSO +fccanalysis(1), fccanalysis-plots(1) +.SH BUGS +Many +.SH AUTHORS +There are many contributors to the FCCAnalyses framework, but the principal +authors are: +.in +4 +Clement Helsens +.br +Valentin Volkl +.br +Gerardo Ganis +.SH FCCANALYSES +Part of the FCCAnalyses framework\&. +.SH LINKS +.PP +.UR https://hep-fcc\&.github\&.io/FCCAnalyses/ +FCCAnalyses webpage +.UE +.PP +.UR https://github\&.com/HEP\-FCC/FCCAnalyses/ +FCCAnalysises GitHub repository +.UE +.PP +.UR https://fccsw\-forum\&.web\&.cern\&.ch/ +FCCSW Forum +.UE +.SH CONTACT +.pp +.MT FCC-PED-SoftwareAndComputing-Analysis@cern.ch +FCC-PED-SoftwareAndComputing-Analysis +.ME diff --git a/python/anascript.py b/python/anascript.py index 31b8e11023..6248993591 100644 --- a/python/anascript.py +++ b/python/anascript.py @@ -247,7 +247,7 @@ def get_element_dict(_dict, element: str): return None -def get_attribute(obj, attr_name: str, default_val=None): +def get_attribute(obj: object, attr_name: str, default_val=None) -> any: ''' Returns requested attribute value or default value. ''' diff --git a/python/do_plots.py b/python/do_plots.py index 63f5275e04..82ab862ecb 100644 --- a/python/do_plots.py +++ b/python/do_plots.py @@ -50,61 +50,121 @@ def formatStatUncHist(hists, name, hstyle=3254): # _____________________________________________________________________________ -def mapHistos(var, label, sel, param, rebin): - LOGGER.info('Run plots for var:%s label:%s selection:%s', - var, label, sel) - signal = param.plots[label]['signal'] - backgrounds = param.plots[label]['backgrounds'] +def determine_lumi_scaling(config: dict[str, any], + infile: object, + initial_scale: float = 1.0) -> float: + ''' + Determine whether to (re)scale histograms in the file to luminosity. + ''' + scale: float = initial_scale + + # Check if histograms were already scaled to lumi + try: + scaled: bool = infile.scaled.GetVal() + except AttributeError: + LOGGER.error('Input file does not contain scaling ' + 'information!\n %s\nAborting...', infile.GetName()) + sys.exit(3) + + if scaled: + try: + int_lumi_in_file: float = infile.intLumi.GetVal() + except AttributeError: + LOGGER.error('Can not load integrated luminosity ' + 'value from the input file!\n %s\n' + 'Aborting...', infile.GetName()) + + if config['int_lumi'] != int_lumi_in_file: + LOGGER.warning( + 'Histograms are already scaled to different ' + 'luminosity value!\n' + 'Luminosity in the input file is %s pb-1 and ' + 'luminosity requested in plots script is %s pb-1.', + int_lumi_in_file, config['int_lumi']) + if config['do_scale']: + LOGGER.warning( + 'Rescaling from %s pb-1 to %s pb-1...', + int_lumi_in_file, config['int_lumi']) + scale *= config['int_lumi'] / int_lumi_in_file + + else: + if config['do_scale']: + scale = scale * config['int_lumi'] + + return scale + + +# _____________________________________________________________________________ +def load_hists(var: str, + label: str, + sel: str, + config: dict[str, any], + rebin: int) -> tuple[dict[str, any], dict[str: any]]: + ''' + Load all histograms needed for the plot + ''' + + try: + signal = config['plots'][label]['signal'] + except KeyError: + signal = {} + + try: + backgrounds = config['plots'][label]['backgrounds'] + except KeyError: + backgrounds = {} hsignal = {} for s in signal: hsignal[s] = [] - for f in signal[s]: - fin = param.inputDir+f+'_'+sel+'_histo.root' - if not os.path.isfile(fin): - LOGGER.info('File "%s" not found!\nSkipping it...', fin) + for filepathstem in signal[s]: + infilepath = config['input_dir'] + filepathstem + '_' + sel + \ + '_histo.root' + if not os.path.isfile(infilepath): + LOGGER.info('File "%s" not found!\nSkipping it...', infilepath) continue - with ROOT.TFile(fin, 'READ') as tf: - h = tf.Get(var) - hh = copy.deepcopy(h) - hh.SetDirectory(0) - scaleSig = 1. - try: - scaleSig = param.scaleSig - except AttributeError: - LOGGER.debug('No scale signal, using 1.') - param.scaleSig = scaleSig - LOGGER.info('ScaleSig: %g', scaleSig) - hh.Scale(param.intLumi*scaleSig) - hh.Rebin(rebin) + with ROOT.TFile(infilepath, 'READ') as infile: + hist = copy.deepcopy(infile.Get(var)) + hist.SetDirectory(0) + + scale = determine_lumi_scaling(config, + infile, + config['scale_sig']) + hist.Scale(scale) + hist.Rebin(rebin) if len(hsignal[s]) == 0: - hsignal[s].append(hh) + hsignal[s].append(hist) else: - hh.Add(hsignal[s][0]) - hsignal[s][0] = hh + hist.Add(hsignal[s][0]) + hsignal[s][0] = hist hbackgrounds = {} for b in backgrounds: hbackgrounds[b] = [] - for f in backgrounds[b]: - fin = param.inputDir+f+'_'+sel+'_histo.root' - if not os.path.isfile(fin): - LOGGER.info('File "%s" not found!\nSkipping it...', fin) + for filepathstem in backgrounds[b]: + infilepath = config['input_dir'] + filepathstem + '_' + sel + \ + '_histo.root' + if not os.path.isfile(infilepath): + LOGGER.info('File "%s" not found!\nSkipping it...', infilepath) continue - with ROOT.TFile(fin) as tf: - h = tf.Get(var) - hh = copy.deepcopy(h) - hh.SetDirectory(0) - hh.Scale(param.intLumi) - hh.Rebin(rebin) + with ROOT.TFile(infilepath) as infile: + hist = copy.deepcopy(infile.Get(var)) + hist.SetDirectory(0) + + scale = determine_lumi_scaling(config, + infile, + config['scale_bkg']) + hist.Scale(scale) + hist.Rebin(rebin) + if len(hbackgrounds[b]) == 0: - hbackgrounds[b].append(hh) + hbackgrounds[b].append(hist) else: - hh.Add(hbackgrounds[b][0]) - hbackgrounds[b][0] = hh + hist.Add(hbackgrounds[b][0]) + hbackgrounds[b][0] = hist for s in hsignal: if len(hsignal[s]) == 0: @@ -114,10 +174,6 @@ def mapHistos(var, label, sel, param, rebin): if len(hbackgrounds[b]) == 0: hbackgrounds = removekey(hbackgrounds, b) - if not hsignal: - LOGGER.error('No signal input files found!\nAborting...') - sys.exit(3) - return hsignal, hbackgrounds @@ -188,7 +244,7 @@ def mapHistosFromHistmaker(hName, param, plotCfg): # _____________________________________________________________________________ -def runPlots(config: dict, +def runPlots(config: dict[str, any], args, var, sel, @@ -278,17 +334,14 @@ def runPlots(config: dict, histos.append(hbackgrounds[bkg][0]) colors.append(script_module.colors[bkg]) - intLumiab = script_module.intLumi/1e+06 - intLumi = f'L = {intLumiab:.0f} ab^{{-1}}' - if hasattr(script_module, "intLumiLabel"): - intLumi = getattr(script_module, "intLumiLabel") - lt = 'FCCAnalyses: FCC-hh Simulation (Delphes)' - rt = f'#sqrt{{s}} = {script_module.energy:.1f} TeV, L = {intLumi}' + rt = f'#sqrt{{s}} = {script_module.energy:.1f} TeV, ' \ + f'{config["int_lumi_label"]}' if 'ee' in script_module.collider: lt = 'FCCAnalyses: FCC-ee Simulation (Delphes)' - rt = f'#sqrt{{s}} = {script_module.energy:.1f} GeV, {intLumi}' + rt = f'#sqrt{{s}} = {script_module.energy:.1f} GeV, ' \ + f'{config["int_lumi_label"]}' customLabel = "" try: @@ -296,33 +349,26 @@ def runPlots(config: dict, except AttributeError: LOGGER.debug('No custom label, using nothing...') - scaleSig = 1. - try: - scaleSig = script_module.scaleSig - except AttributeError: - LOGGER.debug('No scale signal, using 1.') - script_module.scaleSig = scaleSig - if 'AAAyields' in var: - drawStack(var, 'events', leg, lt, rt, script_module.formats, + drawStack(config, var, 'events', leg, lt, rt, script_module.formats, script_module.outdir + "/" + sel, False, True, histos, - colors, script_module.ana_tex, extralab, scaleSig, + colors, script_module.ana_tex, extralab, customLabel, nsig, nbkg, leg2, yields, config['plot_stat_unc']) return if 'stack' in script_module.stacksig: if 'lin' in script_module.yaxis: - drawStack(var + "_stack_lin", 'events', leg, lt, rt, + drawStack(config, var + "_stack_lin", 'events', leg, lt, rt, script_module.formats, script_module.outdir + "/" + sel, False, True, histos, colors, script_module.ana_tex, - extralab, scaleSig, customLabel, nsig, nbkg, leg2, + extralab, customLabel, nsig, nbkg, leg2, yields, config['plot_stat_unc']) if 'log' in script_module.yaxis: - drawStack(var + "_stack_log", 'events', leg, lt, rt, + drawStack(config, var + "_stack_log", 'events', leg, lt, rt, script_module.formats, script_module.outdir + "/" + sel, True, True, histos, colors, script_module.ana_tex, - extralab, scaleSig, customLabel, nsig, nbkg, leg2, + extralab, customLabel, nsig, nbkg, leg2, yields, config['plot_stat_unc']) if 'lin' not in script_module.yaxis and \ 'log' not in script_module.yaxis: @@ -331,17 +377,17 @@ def runPlots(config: dict, if 'nostack' in script_module.stacksig: if 'lin' in script_module.yaxis: - drawStack(var + "_nostack_lin", 'events', leg, lt, rt, + drawStack(config, var + "_nostack_lin", 'events', leg, lt, rt, script_module.formats, script_module.outdir + "/" + sel, False, False, histos, - colors, script_module.ana_tex, extralab, scaleSig, + colors, script_module.ana_tex, extralab, customLabel, nsig, nbkg, leg2, yields, config['plot_stat_unc']) if 'log' in script_module.yaxis: - drawStack(var + "_nostack_log", 'events', leg, lt, rt, + drawStack(config, var + "_nostack_log", 'events', leg, lt, rt, script_module.formats, script_module.outdir + "/" + sel, True, False, histos, colors, script_module.ana_tex, - extralab, scaleSig, customLabel, nsig, nbkg, leg2, + extralab, customLabel, nsig, nbkg, leg2, yields, config['plot_stat_unc']) if 'lin' not in script_module.yaxis and \ 'log' not in script_module.yaxis: @@ -379,7 +425,9 @@ def runPlotsHistmaker(args, hName, param, plotCfg): leg2.SetFillStyle(0) leg2.SetLineColor(0) leg2.SetShadowColor(10) - leg2.SetTextSize(args.legend_text_size) + leg2.SetTextSize(args.legend_text_size + if args.legend_text_size is not None + else 0.035) leg2.SetTextFont(42) else: legsize = 0.04*(len(hbackgrounds)+len(hsignal)) @@ -392,15 +440,17 @@ def runPlotsHistmaker(args, hName, param, plotCfg): leg2 = None leg = ROOT.TLegend( - args.legend_x_min if args.legend_x_min > 0 else legCoord[0], - args.legend_y_min if args.legend_y_min > 0 else legCoord[1], - args.legend_x_max if args.legend_x_max > 0 else legCoord[2], - args.legend_y_max if args.legend_y_max > 0 else legCoord[3]) + args.legend_x_min if args.legend_x_min is not None else legCoord[0], + args.legend_y_min if args.legend_y_min is not None else legCoord[1], + args.legend_x_max if args.legend_x_max is not None else legCoord[2], + args.legend_y_max if args.legend_y_max is not None else legCoord[3]) leg.SetFillColor(0) leg.SetFillStyle(0) leg.SetLineColor(0) leg.SetShadowColor(10) - leg.SetTextSize(args.legend_text_size) + leg.SetTextSize(args.legend_text_size + if args.legend_text_size is not None + else 0.035) leg.SetTextFont(42) for b in hbackgrounds: @@ -435,6 +485,8 @@ def runPlotsHistmaker(args, hName, param, plotCfg): histos.append(hbackgrounds[b][0]) colors.append(param.colors[b]) + config: dict[str, any] = {} + xtitle = plotCfg['xtitle'] if 'xtitle' in plotCfg else "" ytitle = plotCfg['ytitle'] if 'ytitle' in plotCfg else "Events" xmin = plotCfg['xmin'] if 'xmin' in plotCfg else -1 @@ -444,7 +496,7 @@ def runPlotsHistmaker(args, hName, param, plotCfg): stack = plotCfg['stack'] if 'stack' in plotCfg else False logy = plotCfg['logy'] if 'logy' in plotCfg else False extralab = plotCfg['extralab'] if 'extralab' in plotCfg else "" - scaleSig = plotCfg['scaleSig'] if 'scaleSig' in plotCfg else 1 + config['scale_sig'] = plotCfg['scaleSig'] if 'scaleSig' in plotCfg else 1 intLumiab = param.intLumi/1e+06 intLumi = f'L = {intLumiab:.0f} ab^{{-1}}' @@ -466,36 +518,36 @@ def runPlotsHistmaker(args, hName, param, plotCfg): if stack: if logy: - drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, - True, True, histos, colors, param.ana_tex, extralab, - scaleSig, customLabel, nsig, nbkg, leg2, yields, + drawStack(config, output, ytitle, leg, lt, rt, param.formats, + param.outdir, True, True, histos, colors, param.ana_tex, + extralab, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle) else: - drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, - False, True, histos, colors, param.ana_tex, extralab, - scaleSig, customLabel, nsig, nbkg, leg2, yields, + drawStack(config, output, ytitle, leg, lt, rt, param.formats, + param.outdir, False, True, histos, colors, param.ana_tex, + extralab, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle) else: if logy: - drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, - True, False, histos, colors, param.ana_tex, extralab, - scaleSig, customLabel, nsig, nbkg, leg2, yields, + drawStack(config, output, ytitle, leg, lt, rt, param.formats, + param.outdir, True, False, histos, colors, param.ana_tex, + extralab, customLabel, nsig, nbkg, leg2, yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, xtitle=xtitle) else: - drawStack(output, ytitle, leg, lt, rt, param.formats, param.outdir, - False, False, histos, colors, param.ana_tex, extralab, - scaleSig, customLabel, nsig, nbkg, leg2, yields, - plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, - xtitle=xtitle) + drawStack(config, output, ytitle, leg, lt, rt, param.formats, + param.outdir, False, False, histos, colors, + param.ana_tex, extralab, customLabel, nsig, nbkg, leg2, + yields, plotStatUnc, xmin=xmin, xmax=xmax, ymin=ymin, + ymax=ymax, xtitle=xtitle) # _____________________________________________________________________________ -def drawStack(name, ylabel, legend, leftText, rightText, formats, directory, - logY, stacksig, histos, colors, ana_tex, extralab, scaleSig, +def drawStack(config, name, ylabel, legend, leftText, rightText, formats, + directory, logY, stacksig, histos, colors, ana_tex, extralab, customLabel, nsig, nbkg, legend2=None, yields=None, plotStatUnc=False, xmin=-1, xmax=-1, ymin=-1, ymax=-1, xtitle=""): @@ -589,8 +641,9 @@ def drawStack(name, ylabel, legend, leftText, rightText, formats, directory, hStackSig.Draw("HIST SAME NOSTACK") if plotStatUnc: # bkg-only uncertainty - hUnc_bkg = formatStatUncHist(hStackBkg.GetHists(), "bkg_only") - hUnc_bkg.Draw("E2 SAME") + if hStackBkg.GetNhists() != 0: + hUnc_bkg = formatStatUncHist(hStackBkg.GetHists(), "bkg_only") + hUnc_bkg.Draw("E2 SAME") for sHist in hStackSig.GetHists(): # sigs uncertainty hUnc_sig = formatStatUncHist([sHist], "sig", 3245) @@ -623,10 +676,17 @@ def get_minmax_range(hists, xmin, xmax): if stacksig: ymin_, ymax_ = get_minmax_range(hStack.GetHists(), xmin, xmax) else: - ymin_sig, ymax_sig = get_minmax_range(hStackSig.GetHists(), xmin, xmax) - ymin_bkg, ymax_bkg = get_minmax_range(hStackBkg.GetHists(), xmin, xmax) - ymin_ = min(ymin_sig, ymin_bkg) - ymax_ = max(ymax_sig, ymax_bkg) + if hStackSig.GetNhists() != 0 and hStackBkg.GetNhists() != 0: + ymin_sig, ymax_sig = get_minmax_range(hStackSig.GetHists(), + xmin, xmax) + ymin_bkg, ymax_bkg = get_minmax_range(hStackBkg.GetHists(), + xmin, xmax) + ymin_ = min(ymin_sig, ymin_bkg) + ymax_ = max(ymax_sig, ymax_bkg) + elif hStackSig.GetNhists() == 0: + ymin_, ymax_ = get_minmax_range(hStackBkg.GetHists(), xmin, xmax) + elif hStackBkg.GetNhists() == 0: + ymin_, ymax_ = get_minmax_range(hStackSig.GetHists(), xmin, xmax) if ymin == -1: ymin = ymin_*0.1 if logY else 0 if ymax == -1: @@ -676,9 +736,16 @@ def get_minmax_range(hists, xmin, xmax): latex.SetTextSize(0.025) latex.DrawLatex(0.18, 0.66, text) - text = '#bf{#it{' + 'Signal scale=' + str(scaleSig)+'}}' - latex.SetTextSize(0.025) - if scaleSig != 1: + if config['scale_sig'] != 1.0: + text = '#bf{#it{Signal Scaling = ' + f'{config["scale_sig"]:.3g}' + \ + '}}' + latex.SetTextSize(0.025) + latex.DrawLatex(0.18, 0.63, text) + + if config['scale_bkg'] != 1.0: + text = '#bf{#it{Background Scaling = ' + \ + f'{config["scale_bkg"]:.3g}' + '}}' + latex.SetTextSize(0.025) latex.DrawLatex(0.18, 0.63, text) canvas.RedrawAxis() @@ -721,9 +788,15 @@ def get_minmax_range(hists, xmin, xmax): latex.SetTextSize(0.025) latex.DrawLatex(0.18, 0.68, text) - text = '#bf{#it{' + 'Signal scale=' + str(scaleSig) + '}}' + text = '#bf{#it{Signal Scaling = ' + f'{config["scale_sig"]:.3g}' + \ + '}}' + latex.SetTextSize(0.04) + latex.DrawLatex(0.18, 0.57, text) + + text = '#bf{#it{Background Scaling = ' + \ + f'{config["scale_bkg"]:.3g}' + '}}' latex.SetTextSize(0.04) - latex.DrawLatex(0.18, 0.55, text) + latex.DrawLatex(0.18, 0.52, text) dy = 0 text = '#bf{#it{' + 'Process' + '}}' @@ -797,8 +870,71 @@ def run(args): script_module = importlib.import_module(base_name) # Merge script and command line arguments into one configuration object - config = {} + # Also check the script attributes + config: dict[str, any] = {} + + # Input directory + config['input_dir'] = os.getcwd() + if hasattr(script_module, 'indir'): + config['input_dir'] = script_module.indir + if hasattr(script_module, 'inputDir'): + config['input_dir'] = script_module.inputDir + if args.input_dir is not None: + config['input_dir'] = args.input_dir + + # Output directory + config['output_dir'] = os.getcwd() + if hasattr(script_module, 'outdir'): + config['output_dir'] = script_module.outdir + if hasattr(script_module, 'outputDir'): + config['output_dir'] = script_module.outputDir + if args.output_dir is not None: + config['output_dir'] = args.output_dir + + # Integrated luminosity + config['int_lumi'] = 1. + if hasattr(script_module, 'intLumi'): + config['int_lumi'] = script_module.intLumi + else: + LOGGER.debug('No integrated luminosity provided, using 1.0 pb-1.') + LOGGER.info('Integrated luminosity: %g pb-1', config['int_lumi']) + + # Whether to scale histograms to luminosity + config['do_scale'] = 1.0 + if hasattr(script_module, 'doScale'): + config['do_scale'] = script_module.doScale + else: + LOGGER.debug('No scaling to luminosity requested, scaling won\'t be ' + 'done.') + config['do_scale'] = False + if config['do_scale']: + LOGGER.info('Histograms will be scaled to luminosity.') + + # Scale factor to apply to all signal histograms + config['scale_sig'] = 1.0 + if hasattr(script_module, 'scaleSig'): + config['scale_sig'] = script_module.scaleSig + else: + LOGGER.debug('No scale factor for signal provided, using 1.0.') + LOGGER.info('Scale factor for signal: %g', config['scale_sig']) + # Scale factor to apply to all background histograms + config['scale_bkg'] = 1.0 + if hasattr(script_module, 'scaleBkg'): + config['scale_bkg'] = script_module.scaleBkg + else: + LOGGER.debug('No scale factor for background provided, using 1.0.') + LOGGER.info('Scale factor for background: %g', config['scale_sig']) + + # Plots list + config['plots']: dict[str, any] = {} + if hasattr(script_module, 'plots'): + config['plots'] = script_module.plots + else: + LOGGER.debug('List of plots not provided!\nAborting...') + sys.exit(3) + + # Splitting legend into two columns config['split_leg'] = False if hasattr(script_module, 'splitLeg'): config['split_leg'] = script_module.splitLeg @@ -825,6 +961,21 @@ def run(args): if args.legend_text_size is not None: config['legend_text_size'] = args.legend_text_size + # Label for the integrated luminosity + config['int_lumi_label'] = None + if hasattr(script_module, "intLumiLabel"): + config['int_lumi_label'] = script_module.intLumiLabel + if config['int_lumi_label'] is None: + if config['int_lumi'] >= 1e6: + int_lumi_label = config['int_lumi'] / 1e6 + config['int_lumi_label'] = f'L = {int_lumi_label:.2g} ab^{{-1}}' + elif config['int_lumi'] >= 1e3: + int_lumi_label = config['int_lumi'] / 1e3 + config['int_lumi_label'] = f'L = {int_lumi_label:.2g} fb^{{-1}}' + else: + config['int_lumi_label'] = \ + f'L = {config["int_lumi"]:.2g} pb^{{-1}}' + # Handle plots for Histmaker analyses and exit if hasattr(script_module, 'hists'): for hist_name, plot_cfg in script_module.hists.items(): @@ -832,6 +983,7 @@ def run(args): sys.exit() counter = 0 + LOGGER.info('Plotting:') for var_index, var in enumerate(script_module.variables): for label, sels in script_module.selections.items(): for sel in sels: @@ -840,11 +992,15 @@ def run(args): if len(script_module.rebin) == \ len(script_module.variables): rebin_tmp = script_module.rebin[var_index] - hsignal, hbackgrounds = mapHistos(var, - label, - sel, - script_module, - rebin=rebin_tmp) + + LOGGER.info(' var: %s label: %s selection: %s', + var, label, sel) + + hsignal, hbackgrounds = load_hists(var, + label, + sel, + config, + rebin=rebin_tmp) runPlots(config, args, var + "_" + label, diff --git a/python/frame.py b/python/frame.py index 2930046ae6..78a58cfa8a 100644 --- a/python/frame.py +++ b/python/frame.py @@ -22,13 +22,13 @@ def generate_graph(dframe, args, suffix: str | None = None) -> None: # Check if output file path is provided graph_path: pathlib.PurePath = pathlib.PurePath(args.graph_path) if args.graph_path == '': - graph_path = pathlib.PurePath(args.anascript_path).with_suffix('.dot') + graph_path = pathlib.PurePath(os.getcwd(), 'fccanalysis_graph.dot') # check if file path ends with "correct" extension if graph_path.suffix not in ('.dot', '.png'): LOGGER.warning('Graph output file extension not recognized!\n' 'Using analysis script name...') - graph_path = pathlib.PurePath(args.anascript_path).with_suffix('.dot') + graph_path = pathlib.PurePath(os.getcwd(), 'fccanalysis_graph.dot') # Add optional suffix to the output file path if suffix is not None: @@ -41,7 +41,8 @@ def generate_graph(dframe, args, suffix: str | None = None) -> None: LOGGER.info('Analysis computational graph will be saved into:\n - %s', graph_path.with_suffix('.dot')) else: - LOGGER.info('Analysis computational graph will be saved into:\n - %s\n - %s', + LOGGER.info('Analysis computational graph will be saved ' + 'into:\n - %s\n - %s', graph_path.with_suffix('.dot'), graph_path.with_suffix('.png')) diff --git a/python/parsers.py b/python/parsers.py index c0915ef777..c2fc092eac 100644 --- a/python/parsers.py +++ b/python/parsers.py @@ -146,7 +146,11 @@ def setup_run_parser_plots(parser): ''' Define command line arguments for the plots sub-command. ''' - parser.add_argument('script_path', help="path to the plots script") + parser.add_argument('script_path', help='path to the plots script') + parser.add_argument('--input-dir', type=str, default=None, + help='input directory location') + parser.add_argument('--output-dir', type=str, default=None, + help='output directory location') parser.add_argument('--legend-text-size', type=float, default=None, help='text size for the legend elements') parser.add_argument('--legend-x-min', type=float, default=None, @@ -159,7 +163,6 @@ def setup_run_parser_plots(parser): help='maximal y position of the legend') - def setup_run_parser_combine(parser): ''' Define command line arguments for the combine sub-command. diff --git a/python/run_final_analysis.py b/python/run_final_analysis.py index 4f0cb48ebb..7eaa7740b0 100644 --- a/python/run_final_analysis.py +++ b/python/run_final_analysis.py @@ -8,9 +8,13 @@ import glob import logging import importlib.util +import pathlib +import json +import math import ROOT # type: ignore -from anascript import get_element, get_element_dict +import cppyy +from anascript import get_element, get_attribute from process import get_process_dict from frame import generate_graph @@ -44,60 +48,179 @@ def get_entries(infilepath: str) -> tuple[int, int]: # _____________________________________________________________________________ -def testfile(f: str) -> bool: +def get_processes(rdf_module: object) -> list[str]: ''' - Test input file from previous stages + Get processes from the analysis script or find them in the input directory. + TODO: filter out files without .root suffix ''' - with ROOT.TFile(f, 'READ') as infile: - tt = None - try: - tt = infile.Get("events") - if tt is None: - LOGGER.warning('File does not contains events, selection was ' - 'too tight, skipping it: %s', f) - return False - except IOError as e: - LOGGER.warning('I/O error(%i): %s', e.errno, e.strerror) - return False - except ValueError: - LOGGER.warning('Could read the file') - return False - except: - LOGGER.warning('Unexpected error: %s\nfile ===%s=== must be ' - 'deleted', - sys.exc_info()[0], f) - return False - return True + process_list: list[str] = get_attribute(rdf_module, 'processList', []) + input_dir: str = get_attribute(rdf_module, 'inputDir', '') + if not process_list: + files_or_dirs = glob.glob(f'{input_dir}/*') + process_list = [pathlib.Path(p).stem for p in files_or_dirs] + info_msg = f'Found {len(process_list)} processes in the input ' \ + 'directory:' + for process_name in process_list: + info_msg += f'\n - {process_name}' + LOGGER.info(info_msg) + + return process_list + + +# _____________________________________________________________________________ +def save_results(results: dict[str, dict[str, any]], + rdf_module: object) -> None: + ''' + Save results into various formats, depending on the analysis script. + ''' + output_dir: str = get_attribute(rdf_module, 'outputDir', '.') + + if get_attribute(rdf_module, 'saveJSON', False): + json_path: str = os.path.join(output_dir, 'results.json') + LOGGER.info('Saving results into JSON file:\n%s', json_path) + save_json(results, json_path) + + if get_attribute(rdf_module, 'saveTabular', False): + cut_labels: dict[str, str] = get_attribute(rdf_module, 'cutLabels', + None) + tables_path: str = os.path.join(output_dir, 'outputTabular.txt') + LOGGER.info('Saving results in LaTeX tables to:\n%s', tables_path) + save_tables(results, tables_path, cut_labels) + + +# _____________________________________________________________________________ +def save_json(results: dict[str, dict[str, any]], + outpath: str) -> None: + ''' + Save results into a JSON file. + ''' + with open(outpath, 'w', encoding='utf-8') as outfile: + json.dump(results, outfile) + + +# _____________________________________________________________________________ +def save_tables(results: dict[str, dict[str, any]], + outpath: str, + cut_labels: dict[str, str] = None) -> None: + ''' + Save results into LaTeX tables. + ''' + cut_names: list[str] = list(results[next(iter(results))].keys()) + if not cut_names: + LOGGER.error('No results found!\nAborting...') + sys.exit(3) + + if cut_labels is None: + cut_labels = {} + for name in cut_names: + cut_labels[name] = f'{name}' + + cut_labels['all_events'] = 'All events' + + with open(outpath, 'w', encoding='utf-8') as outfile: + # Printing the number of events in format of a LaTeX table + # Yields + outfile.write('Yields:\n') + outfile.write('\\begin{table}[H]\n' + ' \\resizebox{\\textwidth}{!}{\n') + + outfile.write(' \\begin{tabular}{|l||') + outfile.write('c|' * (len(cut_labels) + 2)) # Number of cuts + outfile.write('} \\hline\n') + + outfile.write(8 * ' ') + outfile.write(' & ') + outfile.write(' & '.join(cut_labels.values())) + outfile.write(' \\\\ \\hline\n') + + for process_name, result in results.items(): + outfile.write(8 * ' ') + outfile.write(process_name) + for cut_name in cut_names: + cut_result: dict[str, any] = result[cut_name] + outfile.write(' & ') + if cut_result["n_events_raw"] == 0.: + outfile.write('0.') + else: + outfile.write(f'{cut_result["n_events"]:.2e}') + outfile.write(' $\\pm$ ') + outfile.write(f'{cut_result["uncertainty"]:.2e}') + outfile.write(' \\\\\n') + outfile.write(' \\hline\n' + ' \\end{tabular}}\n' + ' \\caption{Caption}\n' + ' \\label{tab:my_label}\n' + '\\end{table}') + + # Efficiency: + outfile.write('\n\nEfficiency:\n') + outfile.write('\\begin{table}[H] \n' + ' \\resizebox{\\textwidth}{!}{ \n') + + outfile.write(' \\begin{tabular}{|l||') + outfile.write('c|' * len(results)) + outfile.write('} \\hline\n') + + outfile.write(8 * ' ') + outfile.write(' & ') + outfile.write(' & '.join(results.keys())) + outfile.write(' \\hline \\\\\n') + + for cut_name in cut_names: + if cut_name == 'all_events': + continue + outfile.write(8 * ' ') + outfile.write(f'{cut_name}') + for result in results.values(): + efficiency = result[cut_name]['n_events'] / \ + result['all_events']['n_events'] + if efficiency == 0.: + outfile.write(' & 0.') + else: + outfile.write(f' & {efficiency:.3g}') + outfile.write(' \\\\\n') + + outfile.write(' \\hline\n' + ' \\end{tabular}}\n' + ' \\caption{Caption}\n' + ' \\label{tab:my_label}\n' + '\\end{table}\n') # __________________________________________________________ -def run(rdf_module, args): +def run(rdf_module, args) -> None: ''' - Main loop. + Let's start. ''' - proc_dict_location = get_element(rdf_module, "procDict", True) + # Load process dictionary + proc_dict_location: str = get_attribute(rdf_module, "procDict", '') if not proc_dict_location: LOGGER.error( 'Location of the process dictionary not provided!\nAborting...') sys.exit(3) - process_dict = get_process_dict(proc_dict_location) - - process_dict_additions = get_element(rdf_module, "procDictAdd", True) - for addition in process_dict_additions: - if get_element_dict(process_dict, addition) is None: - process_dict[addition] = process_dict_additions[addition] - else: - LOGGER.debug('Process already in the dictionary. Skipping it...') - + process_dict: dict[str, any] = get_process_dict(proc_dict_location) + + # Add processes into the dictionary + process_dict_additions = get_attribute(rdf_module, "procDictAdd", {}) + if process_dict_additions: + info_msg = 'Adding the following processes to the process dictionary:' + for process_name, process_info in process_dict_additions.items(): + info_msg += f'\n - {process_name}' + if process_name in process_dict: + LOGGER.debug('Process "%s" already in the dictionary.\n' + 'Will be overwritten...', process_name) + process_dict[process_name] = process_info + LOGGER.info(info_msg) - # set multithreading - ncpus = get_element(rdf_module, "nCPUS", 1) + # Set multi-threading + ncpus = get_attribute(rdf_module, "nCPUS", 4) if ncpus < 0: # use all available threads ROOT.EnableImplicitMT() ncpus = ROOT.GetThreadPoolSize() - ROOT.ROOT.EnableImplicitMT(ncpus) - ROOT.EnableThreadSafety() + if ncpus != 1: + ROOT.ROOT.EnableImplicitMT(ncpus) + ROOT.EnableThreadSafety() nevents_real = 0 start_time = time.time() @@ -105,52 +228,40 @@ def run(rdf_module, args): process_events = {} events_ttree = {} file_list = {} - save_tab = [] - efficiency_list = [] + results = {} - input_dir = get_element(rdf_module, "inputDir", True) + # Checking input directory + input_dir = get_attribute(rdf_module, 'inputDir', '') if not input_dir: - LOGGER.error('The inputDir variable is mandatory for the final stage ' - 'of the analysis!\nAborting...') + LOGGER.error('The "inputDir" variable is mandatory for the final ' + 'stage of the analysis!\nAborting...') + sys.exit(3) + if not os.path.isdir(input_dir): + LOGGER.error('The specified input directory does not exist!\n' + 'Aborting...') + LOGGER.error('Input directory: %s', input_dir) sys.exit(3) if input_dir[-1] != "/": input_dir += "/" - output_dir = get_element(rdf_module, "outputDir", True) - if output_dir != "": - if output_dir[-1] != "/": - output_dir += "/" + # Checking output directory + output_dir = get_attribute(rdf_module, 'outputDir', '.') - if not os.path.exists(output_dir) and output_dir != '': + if output_dir[-1] != "/": + output_dir += "/" + + if not os.path.exists(output_dir): + LOGGER.debug('Creating output directory:\n %s', output_dir) os.system(f'mkdir -p {output_dir}') - cut_list: dict[str, str] = get_element(rdf_module, "cutList", True) - length_cuts_names = max(len(cut) for cut in cut_list) - cut_labels = get_element(rdf_module, "cutLabels", True) - - # save a table in a separate tex file - save_tabular = get_element(rdf_module, "saveTabular", True) - if save_tabular: - # option to rewrite the cuts in a better way for the table. otherwise, - # take them from the cutList - if cut_labels: - cut_names = list(cut_labels.values()) - else: - cut_names = list(cut_list) + # Cuts + cuts: dict[str, str] = get_attribute(rdf_module, "cutList", {}) - cut_names.insert(0, ' ') - save_tab.append(cut_names) - efficiency_list.append(cut_names) + # Find processes (samples) to run over + process_list: list[str] = get_processes(rdf_module) - process_list = get_element(rdf_module, "processList", {}) - if len(process_list) == 0: - files = glob.glob(f"{input_dir}/*") - process_list = [os.path.basename(file.replace(".root", "")) for file in files] - info_msg = f"Found {len(process_list)} processes in the input directory:" - for process_name in process_list: - info_msg += f'\n\t- {process_name}' - LOGGER.info(info_msg) + # Find number of events per process for process_name in process_list: process_events[process_name] = 0 events_ttree[process_name] = 0 @@ -162,7 +273,7 @@ def run(rdf_module, args): 'directory as it might have been processed in batch.', infilepath) else: - LOGGER.info('Open file:\n\t%s', infilepath) + LOGGER.info('Open file:\n %s', infilepath) process_events[process_name], events_ttree[process_name] = \ get_entries(infilepath) file_list[process_name].push_back(infilepath) @@ -189,93 +300,128 @@ def run(rdf_module, args): info_msg += f'\n\t- {process_name}: {n_events:,}' LOGGER.info(info_msg) - histo_list = get_element(rdf_module, "histoList", True) - do_scale = get_element(rdf_module, "doScale", True) - int_lumi = get_element(rdf_module, "intLumi", True) + # Check if there are any histograms defined + histo_list: dict[str, dict[str, any]] = get_attribute(rdf_module, + "histoList", {}) + if not histo_list: + LOGGER.error('No histograms defined!\nAborting...') + sys.exit(3) + + # Check whether to scale the results to the luminosity + do_scale = get_attribute(rdf_module, "doScale", True) + if do_scale: + int_lumi = get_attribute(rdf_module, "intLumi", 1.) + if int_lumi < 0.: + LOGGER.error('Integrated luminosity value not valid!\nAborting...') + sys.exit(3) + # Check whether to save resulting TTree(s) into a file(s) do_tree = get_element(rdf_module, "doTree", True) + + # Main loop for process_name in process_list: LOGGER.info('Running over process: %s', process_name) - if process_events[process_name] == 0: + if process_events[process_name] <= 0: LOGGER.error('Can\'t scale histograms, the number of processed ' 'events for the process "%s" seems to be zero!', process_name) sys.exit(3) - df = ROOT.ROOT.RDataFrame("events", file_list[process_name]) + dframe = ROOT.ROOT.RDataFrame("events", file_list[process_name]) define_list = get_element(rdf_module, "defineList", True) if len(define_list) > 0: LOGGER.info('Registering extra DataFrame defines...') for define in define_list: - df = df.Define(define, define_list[define]) + dframe = dframe.Define(define, define_list[define]) fout_list = [] histos_list = [] - tdf_list = [] + snapshots = [] count_list = [] cuts_list = [] cuts_list.append(process_name) eff_list = [] eff_list.append(process_name) - - # get process information from prodDict - try: - xsec = process_dict[process_name]["crossSection"] - kfactor = process_dict[process_name]["kfactor"] - matchingEfficiency = process_dict[process_name]["matchingEfficiency"] - except KeyError: - xsec = 1.0 - kfactor = 1.0 - matchingEfficiency = 1.0 - LOGGER.error( - f'No value defined for process {process_name} in dictionary!') - gen_sf = xsec*kfactor*matchingEfficiency + results[process_name] = {} - # Define all histos, snapshots, etc... - LOGGER.info('Defining snapshots and histograms') - for cut_name, cut_definition in cut_list.items(): - # output file for tree - fout = output_dir + process_name + '_' + cut_name + '.root' - fout_list.append(fout) + if do_scale: + # Get process information from process directory + try: + xsec = process_dict[process_name]["crossSection"] + except KeyError: + xsec = 1.0 + LOGGER.warning('Cross-section value not found for process ' + '"%s"!\nUsing 1.0...', process_name) + + try: + kfactor = process_dict[process_name]["kfactor"] + except KeyError: + kfactor = 1.0 + LOGGER.warning('Kfactor value not found for process "%s"!\n' + 'Using 1.0...', process_name) + + try: + matching_efficiency = \ + process_dict[process_name]["matchingEfficiency"] + except KeyError: + matching_efficiency = 1.0 + LOGGER.warning('Matching efficiency value not found for ' + 'process "%s"!\nUsing 1.0...', process_name) + + gen_sf = xsec * kfactor * matching_efficiency + lpn = len(process_name) + 8 + LOGGER.info('Generator scale factor for "%s": %.4g', + process_name, gen_sf) + LOGGER.info(' - cross-section: ' + lpn*' ' + '%.4g pb', + xsec) + LOGGER.info(' - kfactor: ' + lpn*' ' + '%.4g', kfactor) + LOGGER.info(' - matching efficiency:' + lpn*' ' + '%.4g', + matching_efficiency) + LOGGER.info('Integrated luminosity: %.4g pb-1', int_lumi) - df_cut = df.Filter(cut_definition) + # Define all histos, snapshots, etc... + LOGGER.info('Defining cuts and histograms') + for cut_name, cut_definition in cuts.items(): + try: + dframe_cut = dframe.Filter(cut_definition) + except cppyy.gbl.std.runtime_error: + LOGGER.error('During defining of the cuts an error ' + 'occurred!\nAborting...') + sys.exit(3) - count_list.append(df_cut.Count()) + count_list.append(dframe_cut.Count()) histos = [] - - for v in histo_list: - # default 1D histogram - if "name" in histo_list[v]: + for hist_name, hist_definition in histo_list.items(): + # default 1D histogram, looks for the name of the column. + if "name" in hist_definition: model = ROOT.RDF.TH1DModel( - v, - f';{histo_list[v]["title"]};', - histo_list[v]["bin"], - histo_list[v]["xmin"], - histo_list[v]["xmax"]) - histos.append(df_cut.Histo1D(model, histo_list[v]["name"])) + hist_name, + f';{hist_definition["title"]};', + hist_definition["bin"], + hist_definition["xmin"], + hist_definition["xmax"]) + histos.append(dframe_cut.Histo1D(model, + hist_definition["name"])) # multi dim histogram (1, 2 or 3D) - elif "cols" in histo_list[v]: - cols = histo_list[v]['cols'] - bins = histo_list[v]['bins'] - bins_unpacked = tuple(i for sub in bins for i in sub) + elif "cols" in hist_definition: + cols = hist_definition['cols'] + bins = hist_definition['bins'] if len(bins) != len(cols): LOGGER.error('Amount of columns should be equal to ' 'the amount of bin configs!\nAborting...') sys.exit(3) + bins_unpacked = tuple(i for sub in bins for i in sub) if len(cols) == 1: - histos.append(df_cut.Histo1D((v, "", *bins_unpacked), - cols[0])) + histos.append(dframe_cut.Histo1D( + (hist_name, '', *bins_unpacked), *cols)) elif len(cols) == 2: - histos.append(df_cut.Histo2D((v, "", *bins_unpacked), - cols[0], - cols[1])) + histos.append(dframe_cut.Histo2D( + (hist_name, "", *bins_unpacked), *cols)) elif len(cols) == 3: - histos.append(df_cut.Histo3D((v, "", *bins_unpacked), - cols[0], - cols[1], - cols[2])) + histos.append(dframe_cut.Histo3D( + (hist_name, "", *bins_unpacked), *cols)) else: LOGGER.error('Only 1, 2 or 3D histograms supported.') sys.exit(3) @@ -286,169 +432,147 @@ def run(rdf_module, args): histos_list.append(histos) if do_tree: + # output file for the TTree + fout = os.path.join(output_dir, + process_name + '_' + cut_name + '.root') + fout_list.append(fout) + opts = ROOT.RDF.RSnapshotOptions() opts.fLazy = True - try: - snapshot_tdf = df_cut.Snapshot("events", fout, "", opts) - except Exception as excp: - LOGGER.error('During the execution of the final stage ' - 'exception occurred:\n%s', excp) - sys.exit(3) - - # Needed to avoid python garbage collector messing around with - # the snapshot - tdf_list.append(snapshot_tdf) - - if args.graph: - generate_graph(df, args) + # Snapshots need to be kept in memory until the event loop is + # run + snapshots.append(dframe_cut.Snapshot("events", fout, "", opts)) # Now perform the loop and evaluate everything at once. LOGGER.info('Evaluating...') - all_events = df.Count().GetValue() + all_events_raw = dframe.Count().GetValue() LOGGER.info('Done') - nevents_real += all_events - uncertainty = ROOT.Math.sqrt(all_events) + nevents_real += all_events_raw + uncertainty = ROOT.Math.sqrt(all_events_raw) if do_scale: - all_events = all_events * 1. * gen_sf * \ + LOGGER.info('Scaling cut yields...') + all_events = all_events_raw * 1. * gen_sf * \ int_lumi / process_events[process_name] - uncertainty = ROOT.Math.sqrt(all_events) * gen_sf * \ + uncertainty = ROOT.Math.sqrt(all_events_raw) * gen_sf * \ int_lumi / process_events[process_name] - LOGGER.info('Printing scaled number of events!!!') - - cfn_width = 16 + length_cuts_names # Cutflow name width - info_msg = 'Cutflow:' - info_msg += f'\n\t{"All events":{cfn_width}} : {all_events:,}' - - if save_tabular: - # scientific notation - recomended for backgrounds - cuts_list.append(f'{all_events:.2e} $\\pm$ {uncertainty:.2e}') - # float notation - recomended for signals with few events - # cuts_list.append(f'{all_events:.3f} $\\pm$ {uncertainty:.3f}') - # ####eff_list.append(1.) # start with 100% efficiency - - for i, cut in enumerate(cut_list): - nevents_this_cut = count_list[i].GetValue() - nevents_this_cut_raw = nevents_this_cut - uncertainty = ROOT.Math.sqrt(nevents_this_cut_raw) + else: + all_events = all_events_raw + uncertainty = ROOT.Math.sqrt(all_events_raw) + + results[process_name]['all_events'] = {} + results[process_name]['all_events']['n_events_raw'] = all_events_raw + results[process_name]['all_events']['n_events'] = all_events + results[process_name]['all_events']['uncertainty'] = uncertainty + + for i, cut in enumerate(cuts): + cut_result = {} + cut_result['n_events_raw'] = count_list[i].GetValue() if do_scale: - nevents_this_cut = \ - nevents_this_cut * 1. * gen_sf * \ + cut_result['n_events'] = \ + cut_result['n_events_raw'] * 1. * gen_sf * \ int_lumi / process_events[process_name] - uncertainty = \ - ROOT.Math.sqrt(nevents_this_cut_raw) * gen_sf * \ + cut_result['uncertainty'] = \ + math.sqrt(cut_result['n_events_raw']) * gen_sf * \ int_lumi / process_events[process_name] - info_msg += f'\n\t{"After selection " + cut:{cfn_width}} : ' - info_msg += f'{nevents_this_cut:,}' - - # Saving the number of events, uncertainty and efficiency for the - # output-file - if save_tabular and cut != 'selNone': - if nevents_this_cut != 0: - # scientific notation - recomended for backgrounds - cuts_list.append( - f'{nevents_this_cut:.2e} $\\pm$ {uncertainty:.2e}') - # float notation - recomended for signals with few events - # cuts_list.append( - # f'{neventsThisCut:.3f} $\\pm$ {uncertainty:.3f}') - eff_list.append(f'{1.*nevents_this_cut/all_events:.3f}') - # if number of events is zero, the previous uncertainty is - # saved instead: - elif '$\\pm$' in cuts_list[-1]: - cut = (cuts_list[-1]).split() - cuts_list.append(f'$\\leq$ {cut[2]}') - eff_list.append('0.') + else: + cut_result['n_events'] = cut_result['n_events_raw'] + cut_result['uncertainty'] = \ + math.sqrt(cut_result['n_events_raw']) + results[process_name][cut] = cut_result + + # Cut name width + cn_width = max(len(cn) for cn in results[process_name].keys()) + info_msg = 'Cutflow:\n' + info_msg += ' ' + cn_width * ' ' + ' Raw events' + if do_scale: + info_msg += ' Scaled events' + for cut_name, cut_result in results[process_name].items(): + if cut_name == 'all_events': + cut_name = 'All events' + info_msg += f'\n - {cut_name:{cn_width}} ' + info_msg += f' {cut_result["n_events_raw"]:>16,}' + if do_scale: + if cut_result['n_events_raw'] != 0: + info_msg += f' {cut_result["n_events"]:>16.2e}' else: - cuts_list.append(cuts_list[-1]) - eff_list.append('0.') + info_msg += f' {"0.":>16}' LOGGER.info(info_msg) + if args.graph: + generate_graph(dframe, args) + args.graph = False + # And save everything LOGGER.info('Saving the outputs...') - for i, cut in enumerate(cut_list): + if do_scale: + LOGGER.info('Scaling the histograms...') + for i, cut in enumerate(cuts): # output file for histograms - fhisto = output_dir + process_name + '_' + cut + '_histo.root' - with ROOT.TFile(fhisto, 'RECREATE'): - for h in histos_list[i]: + fhisto = os.path.join(output_dir, + process_name + '_' + cut + '_histo.root') + with ROOT.TFile(fhisto, 'RECREATE') as outfile: + for hist in histos_list[i]: + hist_name = hist.GetName() + '_raw' + outfile.WriteObject(hist.GetValue(), hist_name) if do_scale: - h.Scale(gen_sf * int_lumi / process_events[process_name]) - h.Write() - - # write all meta info to the output file - p = ROOT.TParameter(int)("eventsProcessed", process_events[process_name]) - p.Write() - # take sum of weights=eventsProcessed for now (assume weights==1) - p = ROOT.TParameter(float)("sumOfWeights", process_events[process_name]) - p.Write() - p = ROOT.TParameter(float)("intLumi", int_lumi) - p.Write() - p = ROOT.TParameter(float)("crossSection", xsec) - p.Write() - p = ROOT.TParameter(float)("kfactor", kfactor) - p.Write() - p = ROOT.TParameter(float)("matchingEfficiency", - matchingEfficiency) - p.Write() + hist.Scale(gen_sf * int_lumi / + process_events[process_name]) + outfile.WriteObject(hist.GetValue()) + + # write all metadata info to the output file + param = ROOT.TParameter(int)("eventsProcessed", + process_events[process_name]) + outfile.WriteObject(param) + + param = ROOT.TParameter(float)("sumOfWeights", + process_events[process_name]) + outfile.WriteObject(param) + param = ROOT.TParameter(bool)("scaled", + do_scale) + outfile.WriteObject(param) + + if do_scale: + param = ROOT.TParameter(float)("intLumi", int_lumi) + outfile.WriteObject(param) + + param = ROOT.TParameter(float)("crossSection", xsec) + outfile.WriteObject(param) + + param = ROOT.TParameter(float)("kfactor", kfactor) + outfile.WriteObject(param) + + param = ROOT.TParameter(float)("matchingEfficiency", + matching_efficiency) + outfile.WriteObject(param) + + param = ROOT.TParameter(float)("generatorScaleFactor", + gen_sf) + outfile.WriteObject(param) if do_tree: - # test that the snapshot worked well - validfile = testfile(fout_list[i]) - if not validfile: - continue - - if save_tabular and cut != 'selNone': - save_tab.append(cuts_list) - efficiency_list.append(eff_list) - - if save_tabular: - tabular_path = output_dir + 'outputTabular.txt' - LOGGER.info('Saving tabular to:\n%s', tabular_path) - with open(tabular_path, 'w', encoding='utf-8') as outfile: - # Printing the number of events in format of a LaTeX table - outfile.write('\\begin{table}[H]\n' - ' \\centering\n' - ' \\resizebox{\\textwidth}{!}{\n' - ' \\begin{tabular}{|l||') - outfile.write('c|' * (len(cuts_list)-1)) - outfile.write('} \\hline\n') - for i, row in enumerate(save_tab): - outfile.write(' ') - outfile.write(' & '.join(row)) - outfile.write(' \\\\\n') - if i == 0: - outfile.write(' \\hline\n') - outfile.write(' \\hline \n' - ' \\end{tabular}} \n' - ' \\caption{Caption} \n' - ' \\label{tab:my_label} \n' - '\\end{table}\n') - - # Efficiency: - outfile.write('\n\nEfficiency:\n') - outfile.write('\\begin{table}[H] \n' - ' \\centering \n' - ' \\resizebox{\\textwidth}{!}{ \n' - ' \\begin{tabular}{|l||') - outfile.write('c|' * (len(cuts_list)-1)) - outfile.write('} \\hline\n') - for i in range(len(eff_list)): - outfile.write(' ') - v = [row[i] for row in efficiency_list] - outfile.write(' & '.join(str(v))) - outfile.write(' \\\\\n') - if i == 0: - outfile.write(' \\hline\n') - outfile.write(' \\hline \n' - ' \\end{tabular}} \n' - ' \\caption{Caption} \n' - ' \\label{tab:my_label} \n' - '\\end{table}\n') + # Number of events from a particular cut + nevt_cut = results[process_name][cut]['n_events_raw'] + # Number of events in file + try: + nevt_infile = snapshots[i].Count().GetValue() + except cppyy.gbl.std.runtime_error: + nevt_infile = 0 + + if nevt_cut != nevt_infile: + LOGGER.error('Number of events for cut "%s" in sample ' + '"%s" does not match with number of saved ' + 'events!', cut, process_name) + sys.exit(3) + + # Save results either to JSON or LaTeX tables + save_results(results, rdf_module) elapsed_time = time.time() - start_time - info_msg = f"{' SUMMARY ':=^80}\n" + info_msg = f"\n{' SUMMARY ':=^80}\n" info_msg += 'Elapsed time (H:M:S): ' info_msg += time.strftime('%H:%M:%S', time.gmtime(elapsed_time)) info_msg += '\nEvents processed/second: ' @@ -468,7 +592,7 @@ def run_final(parser): args, _ = parser.parse_known_args() if args.command != 'final': - LOGGER.error('Unknow sub-command "%s"!\nAborting...', args.command) + LOGGER.error('Unknown sub-command "%s"!\nAborting...', args.command) sys.exit(3) # Check that the analysis file exists