fix: add only for standalone condor& plot code

cms-btv-pog · Oct 27, 2023 · dcb4254 · dcb4254
1 parent 1282411
commit dcb4254
Show file tree

Hide file tree

Showing 4 changed files with 48 additions and 76 deletions.
diff --git a/condor/submitter.py b/condor/submitter.py
@@ -89,13 +89,19 @@ def get_main_parser():
         help="Run with systematics, all, weights_only(no JERC uncertainties included),JERC_split, None",
     )
     parser.add_argument("--isArray", action="store_true", help="Output root files")
+
     parser.add_argument(
         "--noHist", action="store_true", help="Not output coffea histogram"
     )
     parser.add_argument(
         "--overwrite", action="store_true", help="Overwrite existing files"
     )
-
+    parser.add_argument(
+        "--only",
+        type=str,
+        default=None,
+        help="Only  process/skip part of the dataset. By input list of file",
+    )
     parser.add_argument(
         "--voms",
         default=None,
@@ -172,7 +178,20 @@ def get_main_parser():
         sample_dict = json.load(f)
     split_sample_dict = {}
     counter = 0
+    only = []
+    if args.only is not None:
+        if "*" in args.only:
+            only = [
+                k
+                for k in sample_dict.keys()
+                if k.lstrip("/").startswith(args.only.rstrip("*"))
+            ]
+        else:
+            only.append(args.only)
+
     for sample_name, files in sample_dict.items():
+        if len(only) != 0 and sample_name not in only:
+            continue
         for ifile in range(
             (len(files) + args.condorFileSize - 1) // args.condorFileSize
         ):

diff --git a/scripts/plotdataMC.py b/scripts/plotdataMC.py
@@ -241,12 +241,10 @@
         else:
             rebin = np.array([float(i) for i in args.autorebin.split(",")])
             do_xerr = True
-        collated["mc"][discr] = rebin_hist(
-            collated["mc"][discr], collated["mc"][discr].axes[-1].name, rebin
-        )
-        collated["data"][discr] = rebin_hist(
-            collated["data"][discr], collated["data"][discr].axes[-1].name, rebin
-        )
+        for s in collated.keys():
+            collated[s][discr] = rebin_hist(
+                collated[s][discr], collated[s][discr].axes[-1].name, rebin
+            )
 
     ## Rescale noSF & SF to same MC yields
     if (

diff --git a/src/BTVNanoCommissioning/helpers/xs_scaler.py b/src/BTVNanoCommissioning/helpers/xs_scaler.py
@@ -29,62 +29,24 @@ def scaleSumW(output, lumi):
             xs_dict[obj["process_name"]] = xs_dict[obj["process_name"]] * float(
                 obj["kFactor"]
             )
-    duplicated_name = False
-    sumw = {}
-    flist = []
-    for f in output.keys():
-        flist.extend([m for m in output[f].keys() if "Run" not in m])
-    for files in output.keys():
-        if "sumw" not in output[files].keys() and len(flist) != len(set(flist)):
-            duplicated_name = True
-            for sample in output[files].keys():
-                if "Run" in str(output[files][sample]):
-                    continue
-                if sample in sumw.keys():
-                    sumw[sample] = sumw[sample] + float(output[files][sample]["sumw"])
+    merged = {}
+    merged_output = accumulate([output[f] for f in output.keys()])
+
+    for sample, accu in merged_output.items():
+        scaled[sample] = {}
+        for key, h_obj in accu.items():
+            scaled[sample]["sumw"] = merged_output[sample]["sumw"]
+            if isinstance(h_obj, hist.Hist):
+                h = copy.deepcopy(h_obj)
+                if sample in xs_dict.keys():
+                    h = h * xs_dict[sample] * lumi / merged_output[sample]["sumw"]
                 else:
-                    sumw[sample] = float(output[files][sample]["sumw"])
-    for files in output.keys():
-        if "sumw" not in output[files].keys():
-            scaled[files] = {}
-            for sample, accu in output[files].items():
-                scaled[files][sample] = {}
-                scaled[files][sample]["sumw"] = output[files][sample]["sumw"]
-                if duplicated_name:
-                    scaled[files][sample]["sumw"] = sumw[sample]
-                for key, h_obj in accu.items():
-                    if isinstance(h_obj, hist.Hist):
-                        h = copy.deepcopy(h_obj)
-                        if sample in xs_dict.keys():
-                            h = (
-                                h
-                                * xs_dict[sample]
-                                * lumi
-                                / scaled[files][sample]["sumw"]
-                            )
-                        else:
-                            if not (("data" in sample) or ("Run" in sample)):
-                                raise KeyError(sample, "is not found in xsection.py")
-                            else:
-                                h = h
-                        scaled[files][sample][key] = h
-        else:
-            for sample, accu in output[files].items():
-                scaled[sample] = {}
-                for key, h_obj in accu.items():
-                    scaled[sample]["sumw"] = output[files]["sumw"]
-                    if isinstance(h_obj, hist.Hist):
-                        h = copy.deepcopy(h_obj)
-                        if sample in xs_dict.keys():
-                            h = h * xs_dict[sample] * lumi / output[files]["sumw"]
-                        else:
-                            if not (("data" in sample) or ("Run" in sample)) or (
-                                "Double" in sample
-                            ):
-                                raise KeyError(sample, "is not found in xsection.py")
-                            else:
-                                h = h
-                    scaled[sample][key] = h
+                    if ("data" in sample) or ("Run" in sample) or ("Double" in sample):
+                        h = h
+                    else:
+                        raise KeyError(sample, "is not found in xsection.py")
+
+                scaled[sample][key] = h
     return scaled
 
 
@@ -118,19 +80,11 @@ def additional_scale(output, scale, sample_to_scale):
     return scaled
 
 
-def collate(output, mergemap):
+def collate(merged_output, mergemap):
     out = {}
-    merged = {}
-    merged_output = accumulate([output[f] for f in output.keys()])
-    for files in merged_output.keys():
-        if "sumw" not in merged_output[files].keys():
-            for m in output[files].keys():
-                merged[m] = dict(merged_output[files][m].items())
-        else:
-            merged[files] = dict(merged_output[files].items())
     for group, names in mergemap.items():
         out[group] = accumulate(
-            [v for k, v in merged.items() if k.split("_FNAME_")[0] in names]
+            [v for k, v in merged_output.items() if k.split("_FNAME_")[0] in names]
         )
     return out
 

diff --git a/src/BTVNanoCommissioning/workflows/ctag_Wc_valid_sf.py b/src/BTVNanoCommissioning/workflows/ctag_Wc_valid_sf.py
@@ -205,7 +205,7 @@ def process_shift(self, events, shift_name):
 
         # Other cuts
         req_pTratio = (soft_muon[:, 0].pt / mu_jet[:, 0].pt) < 0.4
-
+        idx = np.where(iso_muon.jetIdx == -1, 0, iso_muon.jetIdx)
         req_QCDveto = (
             (iso_muon.pfRelIso04_all < 0.05)
             & (abs(iso_muon.dz) < 0.01)
@@ -550,8 +550,9 @@ def process_shift(self, events, shift_name):
                 out_branch,
                 np.where(
                     (out_branch == "SoftMuon")
-                    # | (out_branch == "MuonJet")
+                    | (out_branch == "MuonJet")
                     | (out_branch == "dilep")
+                    | (out_branch == "OtherJets")
                 ),
             )
 
@@ -560,7 +561,7 @@ def process_shift(self, events, shift_name):
                     "Muon",
                     "Jet",
                     "SoftMuon",
-                    # "MuonJet",
+                    "MuonJet",
                     "dilep",
                     "charge",
                     "MET",
@@ -578,7 +579,7 @@ def process_shift(self, events, shift_name):
             # write to root files
             os.system(f"mkdir -p {self.name}/{dataset}")
             with uproot.recreate(
-                f"{self.name}/{dataset}/{systematics[0]}_{int(events.metadata['entrystop']/self.chunksize)}.root"
+                f"{self.name}/{dataset}/{systematics[0]}_{events.metadata['filename'].split('/')[-1].replace('.root','')}_{int(events.metadata['entrystop']/self.chunksize)}.root"
             ) as fout:
                 fout["Events"] = uproot_writeable(pruned_ev, include=out_branch)