Skip to content

Commit

Permalink
Put JPCalibHandler to utils.correction
Browse files Browse the repository at this point in the history
  • Loading branch information
colizz committed Oct 17, 2023
1 parent 98b0663 commit 08664d9
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 134 deletions.
132 changes: 0 additions & 132 deletions src/BTVNanoCommissioning/helpers/BTA_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import numba as nb
import numpy as np
import pandas as pd
import uproot
import coffea.nanoevents.methods.vector as vector
import os, psutil

Expand Down Expand Up @@ -202,134 +201,3 @@ def calc_ip_vector(obj, dxy, dz, is_3d=False):
# then, get the closest distance to the track on 3D geometry
ipvec_3d = ipvec_2d_ext - ipvec_2d_ext.dot(pvec) / pvec.p2 * pvec
return ipvec_3d


###############
# Classes #
###############


class JPCalibHandler(object):
def __init__(self, campaign, isRealData, dataset):
r"""
A tool for calculating the track probability and jet probability
campaign: campaign name
isRealData: whether the dataset is real data
dataset: dataset name from events.metadata["dataset"]
"""
if isRealData:
for key in config[campaign]["JPCalib"]:
if key in dataset:
filename = config[campaign]["JPCalib"][key]
break
else:
raise ValueError(f"No JPCalib file found for dataset {dataset}")
else:
filename = config[campaign]["JPCalib"]["MC"]

# print(f'Using JPCalib file {filename}')

templates = uproot.open(
f"src/BTVNanoCommissioning/data/JPCalib/{campaign}/{filename}"
)
self.ipsig_histo_val = np.array(
[templates[f"histoCat{i}"].values() for i in range(10)]
)
self.ipsig_histo_tot = np.sum(self.ipsig_histo_val, axis=1)
self.values_cumsum = np.cumsum(self.ipsig_histo_val[:, ::-1], axis=1)[:, ::-1]
self.edges = templates["histoCat0"].axes[0].edges()

def flatten(self, array):
r"""
Get the fully flattened array and its layout for each layer
"""
layouts = []
array_fl = array
while str(ak.type(array_fl)).count("*") > 1:
layouts.append(ak.num(array_fl))
array_fl = ak.flatten(array_fl)
return array_fl, layouts

def unflatten(self, array_fl, layouts):
r"""
Recover a flattened array using the original layouts
"""
array = array_fl
for layout in layouts[::-1]:
array = ak.unflatten(array, layout)
return array

def calc_track_proba(self, ipsig: ak.Array, cat: ak.Array):
r"""
Calculate the track probability from the integral of the track IPsig templates, given the IPsig and category.
Reference code: https://github.com/cms-sw/cmssw/blob/CMSSW_13_0_X/RecoBTag/TrackProbability/src/HistogramProbabilityEstimator.cc
ipsig: IP significance array
cat: category array (0-9)
"""

if ak.any(cat < 0) or ak.any(cat > 9):
raise ValueError("Category out of range [0, 9]")

# get the fully flattened array of the input while storing its layouts for later recovery
ipsig_fl, layouts = self.flatten(ipsig)
cat_fl = ak.flatten(cat, axis=None)

# index of the IPsig bins
ipsig_fl = abs(ipsig_fl)
ipsig_fl_index = np.minimum(
np.searchsorted(self.edges, ipsig_fl), self.ipsig_histo_val.shape[1] - 1
)

# retrieve the cumsum value (\int_{ipsig}^{inf} p(ipsig') d(ipsig')) from the correct template
ipsig_cumsum_fl = self.values_cumsum[cat_fl, ipsig_fl_index]

# calculate the track probability as (\int_{ipsig}^{inf} ..) / (\int_{0}^{inf} ..) * sign(IPsig)
proba_fl = (ipsig_cumsum_fl / self.ipsig_histo_tot[cat_fl]) * np.sign(ipsig_fl)

# recover the original layout
proba = self.unflatten(proba_fl, layouts)
return proba

def calc_jet_proba(self, proba):
# Calculate jet probability (JP)
# according to jetProbability func in https://github.com/cms-sw/cmssw/blob/CMSSW_13_0_X/RecoBTag/ImpactParameter/interface/TemplatedJetProbabilityComputer.h

# minium proba = 0.5%
proba = np.maximum(proba, 0.005) # dim: (evt, jet, trk)

ntrk = ak.num(proba, axis=-1) # dim: (evt, jet), the number of tracks in a jet
prodproba_log = ak.sum(
np.log(proba), axis=-1
) # dim: (evt, jet), the log(Π(proba)) of all tracks in a jet
prodproba_log_m_log = ak.where(
(ak.num(proba, axis=-1) >= 2) & (prodproba_log < 0),
np.log(-prodproba_log),
0,
) # log(-logΠ), if >=2 tracks in a jet

# now calculating Σ_tr{0..N-1} ((-logΠ)^tr / tr!)
trk_index = ak.local_index(proba)
fact_array = ak.concatenate(
[
[1.0],
np.arange(1, max(5, ak.max(trk_index) + 1), dtype=np.float64).cumprod(),
]
) # construct a factorial array
trk_index_fl, _layouts = self.flatten(trk_index)
lfact = self.unflatten(
fact_array[trk_index_fl], _layouts
) # dim: (evt, jet, trk), nested factorial array given the track index

prob = ak.sum(
np.exp(trk_index * prodproba_log_m_log - np.log(lfact)), axis=-1
) # dim: (evt, jet), Σ_tr{0..N-1} ((-logΠ)^tr / tr!)

prob_jet = np.minimum(
np.exp(np.maximum(np.log(np.maximum(prob, 1e-30)) + prodproba_log, -30.0)),
1.0,
) # dim: (evt, jet), calculating Π * Σ_tr{0..N-1} ((-logΠ)^tr / tr!)

prob_jet = ak.where(prodproba_log < 0, prob_jet, 1.0)
prob_jet = np.maximum(prob_jet, 1e-30)

return prob_jet
128 changes: 128 additions & 0 deletions src/BTVNanoCommissioning/utils/correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import re
import numpy as np
import awkward as ak
import uproot
from coffea.lookup_tools import extractor, txt_converters, rochester_lookup

from coffea.lumi_tools import LumiMask
Expand Down Expand Up @@ -1522,3 +1523,130 @@ def add_scalevar_3pt(weights, lhe_weights):
down = np.ones(len(weights.weight()))

weights.add("scalevar_3pt", nom, up, down)


# JP calibration utility
class JPCalibHandler(object):
def __init__(self, campaign, isRealData, dataset):
r"""
A tool for calculating the track probability and jet probability
campaign: campaign name
isRealData: whether the dataset is real data
dataset: dataset name from events.metadata["dataset"]
"""
if isRealData:
for key in config[campaign]["JPCalib"]:
if key in dataset:
filename = config[campaign]["JPCalib"][key]
break
else:
raise ValueError(f"No JPCalib file found for dataset {dataset}")
else:
filename = config[campaign]["JPCalib"]["MC"]

# print(f'Using JPCalib file {filename}')

templates = uproot.open(
f"src/BTVNanoCommissioning/data/JPCalib/{campaign}/{filename}"
)
self.ipsig_histo_val = np.array(
[templates[f"histoCat{i}"].values() for i in range(10)]
)
self.ipsig_histo_tot = np.sum(self.ipsig_histo_val, axis=1)
self.values_cumsum = np.cumsum(self.ipsig_histo_val[:, ::-1], axis=1)[:, ::-1]
self.edges = templates["histoCat0"].axes[0].edges()

def flatten(self, array):
r"""
Get the fully flattened array and its layout for each layer
"""
layouts = []
array_fl = array
while str(ak.type(array_fl)).count("*") > 1:
layouts.append(ak.num(array_fl))
array_fl = ak.flatten(array_fl)
return array_fl, layouts

def unflatten(self, array_fl, layouts):
r"""
Recover a flattened array using the original layouts
"""
array = array_fl
for layout in layouts[::-1]:
array = ak.unflatten(array, layout)
return array

def calc_track_proba(self, ipsig: ak.Array, cat: ak.Array):
r"""
Calculate the track probability from the integral of the track IPsig templates, given the IPsig and category.
Reference code: https://github.com/cms-sw/cmssw/blob/CMSSW_13_0_X/RecoBTag/TrackProbability/src/HistogramProbabilityEstimator.cc
ipsig: IP significance array
cat: category array (0-9)
"""

if ak.any(cat < 0) or ak.any(cat > 9):
raise ValueError("Category out of range [0, 9]")

# get the fully flattened array of the input while storing its layouts for later recovery
ipsig_fl, layouts = self.flatten(ipsig)
cat_fl = ak.flatten(cat, axis=None)

# index of the IPsig bins
ipsig_fl = abs(ipsig_fl)
ipsig_fl_index = np.minimum(
np.searchsorted(self.edges, ipsig_fl), self.ipsig_histo_val.shape[1] - 1
)

# retrieve the cumsum value (\int_{ipsig}^{inf} p(ipsig') d(ipsig')) from the correct template
ipsig_cumsum_fl = self.values_cumsum[cat_fl, ipsig_fl_index]

# calculate the track probability as (\int_{ipsig}^{inf} ..) / (\int_{0}^{inf} ..) * sign(IPsig)
proba_fl = (ipsig_cumsum_fl / self.ipsig_histo_tot[cat_fl]) * np.sign(ipsig_fl)

# recover the original layout
proba = self.unflatten(proba_fl, layouts)
return proba

def calc_jet_proba(self, proba):
# Calculate jet probability (JP)
# according to jetProbability func in https://github.com/cms-sw/cmssw/blob/CMSSW_13_0_X/RecoBTag/ImpactParameter/interface/TemplatedJetProbabilityComputer.h

# minium proba = 0.5%
proba = np.maximum(proba, 0.005) # dim: (evt, jet, trk)

ntrk = ak.num(proba, axis=-1) # dim: (evt, jet), the number of tracks in a jet
prodproba_log = ak.sum(
np.log(proba), axis=-1
) # dim: (evt, jet), the log(Π(proba)) of all tracks in a jet
prodproba_log_m_log = ak.where(
(ak.num(proba, axis=-1) >= 2) & (prodproba_log < 0),
np.log(-prodproba_log),
0,
) # log(-logΠ), if >=2 tracks in a jet

# now calculating Σ_tr{0..N-1} ((-logΠ)^tr / tr!)
trk_index = ak.local_index(proba)
fact_array = ak.concatenate(
[
[1.0],
np.arange(1, max(5, ak.max(trk_index) + 1), dtype=np.float64).cumprod(),
]
) # construct a factorial array
trk_index_fl, _layouts = self.flatten(trk_index)
lfact = self.unflatten(
fact_array[trk_index_fl], _layouts
) # dim: (evt, jet, trk), nested factorial array given the track index

prob = ak.sum(
np.exp(trk_index * prodproba_log_m_log - np.log(lfact)), axis=-1
) # dim: (evt, jet), Σ_tr{0..N-1} ((-logΠ)^tr / tr!)

prob_jet = np.minimum(
np.exp(np.maximum(np.log(np.maximum(prob, 1e-30)) + prodproba_log, -30.0)),
1.0,
) # dim: (evt, jet), calculating Π * Σ_tr{0..N-1} ((-logΠ)^tr / tr!)

prob_jet = ak.where(prodproba_log < 0, prob_jet, 1.0)
prob_jet = np.maximum(prob_jet, 1e-30)

return prob_jet
3 changes: 1 addition & 2 deletions src/BTVNanoCommissioning/workflows/BTA_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@
from BTVNanoCommissioning.helpers.update_branch import missing_branch
from BTVNanoCommissioning.helpers.BTA_helper import (
BTA_HLT,
JPCalibHandler,
to_bitwise_trigger,
get_hadron_mass,
cumsum,
is_from_GSP,
calc_ip_vector,
)
from BTVNanoCommissioning.helpers.func import update
from BTVNanoCommissioning.utils.correction import load_SF, JME_shifts
from BTVNanoCommissioning.utils.correction import load_SF, JME_shifts, JPCalibHandler


## Based on coffea_array_producer.ipynb from Congqiao
Expand Down

0 comments on commit 08664d9

Please sign in to comment.