Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New ht thcovmat #2126

Draft
wants to merge 33 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
c990a09
Copied fro branch 'HT_thcovmat'
achiefa Jul 15, 2024
5d227c7
Removed version
achiefa Jul 15, 2024
7dbd5cd
Saving progress - not ready
achiefa Jul 15, 2024
e24e691
Implemented d/p ratio
achiefa Jul 16, 2024
c6394e0
Parsing 'separate_multiplicative' in vp_setupfit
achiefa Jul 16, 2024
7d49793
Minor adjustments
achiefa Jul 16, 2024
25c39af
Corrected bug
achiefa Jul 16, 2024
b23c867
Correcting bug
achiefa Jul 17, 2024
5fa1a4f
Implemented knots in runcard
achiefa Aug 5, 2024
850d17c
Added valiphys card for chi2 report
achiefa Aug 20, 2024
599eb49
First implementation of HT at the level of theory predictions
achiefa Aug 23, 2024
7185eba
Implemented table for kinematics
achiefa Sep 20, 2024
9e8b1eb
Allowed theory HT in runcard - added HERACOMB in HT calculations
achiefa Sep 21, 2024
3d2b200
Excluded HERACOMB
achiefa Sep 21, 2024
4f8adcb
Hacking NMC dataset
achiefa Sep 21, 2024
8e7e0fd
Grouping kinematics
achiefa Sep 26, 2024
a0613af
Reimplementing thcovmat
achiefa Sep 26, 2024
10bff87
Added comment in HT for DIS
achiefa Sep 27, 2024
8b01c6e
Corrected normalisation for SIGMARED DIS NC data sets
achiefa Sep 27, 2024
861916b
Removing unused code
achiefa Sep 29, 2024
1d24747
Added HT for F2C data (EMC) - removed deprecated function
achiefa Oct 2, 2024
615234f
Corrected EMC data iron target
achiefa Oct 2, 2024
61a1594
Removed deprecated code
achiefa Oct 3, 2024
d09f7cd
Refactoring + DIS CC
achiefa Oct 3, 2024
8bbc034
Corrected bug - ready for cc test
achiefa Oct 11, 2024
60290f1
Corrected bug - ready
achiefa Oct 11, 2024
87f16c7
Removing unnecessary code
achiefa Oct 11, 2024
cb9a334
Corrected bug after rebase
achiefa Oct 12, 2024
77e0bec
Add normalisation in CC x-secs
achiefa Oct 17, 2024
67a5bb4
Correct normalisation
achiefa Oct 17, 2024
e74f7cf
Restore n3fit files from master
achiefa Oct 17, 2024
54bda12
remove _PB suffix from process type
RoyStegeman Nov 14, 2024
3725aa4
format a bit
RoyStegeman Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions validphys2/examples/theory_covariance/chi2table_ht.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# This is the driver template for vp-comparefits. It consists on a validphys
# runcard where some settings are missing and are to be filled by the
# vp-comparefits script. The settings below are a sample of such settings, kept
# for reference
#
# meta:
# title: The title of the Report
# keywords: [report_template]
# author: NNPDF Collaboration
#
# current:
# fit: {id: id_of_the_base_fit}
# pdf: {id: id_of_the_base_fit, label: "Current Fit"}
# theory:
# from_: fit
# theoryid:
# from_: theory
# speclabel: "Current Fit"
#
# reference:
# fit: {id: id_of_the_reference_fit}
# pdf: {id: id_of_the_reference_fit, label: "Reference Fit" }
# theory:
# from_: fit
# theoryid:
# from_: theory
# speclabel: "Reference Fit"

pdfs:
- {id: "240816-06-7-01-lc", label: "HT low cuts"}
- {id: "240812-02-ABMP-lnv", label: "HT mid cuts"}
- {id: "240812-04-ABMP-lnv", label: "HT std. cuts"}
- {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"}
- {id: "240807-midcuts", label: "no HT mid cuts"}
- {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"}

fits:
- {id: "240816-06-7-01-lc", label: "HT low cuts"}
- {id: "240812-02-ABMP-lnv", label: "HT mid cuts"}
- {id: "240812-04-ABMP-lnv", label: "HT std. cuts"}
- {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"}
- {id: "240807-midcuts", label: "no HT mid cuts"}
- {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"}

use_cuts: "fromfit"
use_weights_in_covmat: False
use_thcovmat_if_present: True

Q: 1.651

#template: report.md

description:
from_: fit

dataset_inputs:
from_: fit

#dataspecs:
# - theoryid:
# from_: current
# pdf:
# from_: current
# fit:
# from_: current
# speclabel:
# from_: current
#
# - theoryid:
# from_: reference
# pdf:
# from_: reference
# fit:
# from_: reference
# speclabel:
# from_: reference

Datanorm:
normalize_to: data

DataGroups:
- metadata_group: nnpdf31_process
- metadata_group: experiment

ProcessGroup:
metadata_group: nnpdf31_process

template_text: |
Summary
-------
{@ summarise_fits @}

{@with DataGroups@}
$\chi^2$ by {@processed_metadata_group@}
----------------------------------------
{@plot_fits_groups_data_chi2@}
{@endwith@}

$\chi^2$ by dataset
-------------------
### Plot
{@plot_fits_datasets_chi2@}
### Table
{@ProcessGroup fits_chi2_table(show_total=true)@}

actions_:
- report(main=true)
9 changes: 8 additions & 1 deletion validphys2/src/validphys/commondata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
:py:mod:`validphys.coredata`

"""

import functools

from reportengine import collect
from validphys.commondataparser import load_commondata
import functools


@functools.lru_cache
def loaded_commondata_with_cuts(commondata, cuts):
Expand All @@ -35,3 +38,7 @@ def loaded_commondata_with_cuts(commondata, cuts):
groups_dataset_inputs_loaded_cd_with_cuts = collect(
"loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input")
)

groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect(
"loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data")
)
65 changes: 55 additions & 10 deletions validphys2/src/validphys/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -1157,6 +1157,28 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""):
fileloc = l.check_vp_output_file(user_covmat_path)
return fileloc

@configparser.explicit_node
def produce_covmat_custom(self, use_ht_uncertainties: bool = False):
if use_ht_uncertainties:
from validphys.theorycovariance.construction import thcov_ht

return thcov_ht
else:
from validphys.theorycovariance.construction import covs_pt_prescrip

return covs_pt_prescrip

@configparser.explicit_node
def produce_combine_custom(self, use_ht_uncertainties: bool = False):
if use_ht_uncertainties:
from validphys.theorycovariance.construction import combine_by_type_ht

return combine_by_type_ht
else:
from validphys.theorycovariance.construction import combine_by_type

return combine_by_type

@configparser.explicit_node
def produce_nnfit_theory_covmat(
self, point_prescriptions: list = None, user_covmat_path: str = None
Expand All @@ -1183,8 +1205,31 @@ def produce_nnfit_theory_covmat(
from validphys.theorycovariance.construction import user_covmat_fitting

f = user_covmat_fitting
elif use_ht_uncertainties:
# NOTE: this covmat is the same as for scale variations, which will result in a clash of
# table names if we wish to use them simultaneously
if use_user_uncertainties:
from validphys.theorycovariance.construction import total_theory_covmat_fitting

return f
f = total_theory_covmat_fitting
else:
from validphys.theorycovariance.construction import theory_covmat_custom_fitting

f = theory_covmat_custom_fitting

@functools.wraps(f)
def res(*args, **kwargs):
return f(*args, **kwargs)

# Set this to get the same filename regardless of the action.
res.__name__ = "theory_covmat"
return res

@configparser.explicit_node
def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False):
if use_ht_uncertainties:
return validphys.theorycovariance.construction.combine_by_type_ht
return validphys.theorycovariance.construction.combine_by_type

def produce_fitthcovmat(
self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None
Expand Down Expand Up @@ -1731,15 +1776,15 @@ def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None):
if not fakedata:
return validphys.filters.filter_real_data
else:
if theorycovmatconfig is not None and theorycovmatconfig.get(
"use_thcovmat_in_sampling"
):
# NOTE: By the time we run theory covmat closure tests,
# hopefully the generation of pseudodata will be done in python.
raise ConfigError(
"Generating closure test data which samples from the theory "
"covariance matrix has not been implemented yet."
)
# if theorycovmatconfig is not None and theorycovmatconfig.get(
# "use_thcovmat_in_sampling"
# ):
# # NOTE: By the time we run theory covmat closure tests,
# # hopefully the generation of pseudodata will be done in python.
# raise ConfigError(
# "Generating closure test data which samples from the theory "
# "covariance matrix has not been implemented yet."
# )
return validphys.filters.filter_closure_data_by_experiment

@configparser.explicit_node
Expand Down
24 changes: 16 additions & 8 deletions validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
Plots of relations between data PDFs and fits.
"""

from __future__ import generator_stop

from collections import defaultdict
from collections.abc import Sequence
import itertools
Expand All @@ -28,7 +26,7 @@
from validphys.coredata import KIN_NAMES
from validphys.plotoptions.core import get_info, kitable, transform_result
from validphys.results import chi2_stat_labels, chi2_stats
from validphys.sumrules import POL_LIMS, partial_polarized_sum_rules
from validphys.sumrules import POL_LIMS
from validphys.utils import sane_groupby_iter, scale_from_grid, split_ranges

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -301,9 +299,7 @@ def _plot_fancy_impl(
min_vals = []
max_vals = []
fig, ax = plotutils.subplots()
ax.set_title(
"{} {}".format(info.dataset_label, info.group_label(samefig_vals, info.figure_by))
)
ax.set_title(f"{info.dataset_label} {info.group_label(samefig_vals, info.figure_by)}")

lineby = sane_groupby_iter(fig_data, info.line_by)

Expand Down Expand Up @@ -1287,7 +1283,7 @@ def _check_display_cuts_requires_use_cuts(display_cuts, use_cuts):

@make_argcheck
def _check_marker_by(marker_by):
markers = ('process type', 'experiment', 'dataset', 'group')
markers = ('process type', 'experiment', 'dataset', 'group', 'kinematics')
if marker_by not in markers:
raise CheckError("Unknown marker_by value", marker_by, markers)

Expand Down Expand Up @@ -1346,7 +1342,8 @@ def plot_xq2(
will be displaed and marked.

The points are grouped according to the `marker_by` option. The possible
values are: "process type", "experiment", "group" or "dataset".
values are: "process type", "experiment", "group" or "dataset" for discrete
colors, or "kinematics" for coloring by 1/(Q2(1-x))

Some datasets can be made to appear highlighted in the figure: Define a key
called ``highlight_datasets`` containing the names of the datasets to be
Expand Down Expand Up @@ -1461,6 +1458,7 @@ def plot_xq2(

xh = defaultdict(list)
q2h = defaultdict(list)
cvdict = defaultdict(list)

if not highlight_datasets:
highlight_datasets = set()
Expand Down Expand Up @@ -1491,6 +1489,8 @@ def next_options():
elif marker_by == "group":
# if group is None then make sure that shows on legend.
key = str(group)
elif marker_by == "kinematics":
key = None
else:
raise ValueError('Unknown marker_by value')

Expand All @@ -1506,6 +1506,7 @@ def next_options():
xdict = x
q2dict = q2

cvdict[key].append(commondata.load().get_cv())
xdict[key].append(fitted[0])
q2dict[key].append(fitted[1])
if display_cuts:
Expand All @@ -1520,6 +1521,13 @@ def next_options():
else:
# This is to get the label key
coords = [], []
if marker_by == "kinematics":
ht_magnitude = np.concatenate(cvdict[key]) / (coords[1] * (1 - coords[0]))
out = ax.scatter(
*coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()
)
clb = fig.colorbar(out)
clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$')
ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key])

# Iterate again so highlights are printed on top.
Expand Down
37 changes: 37 additions & 0 deletions validphys2/src/validphys/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ def data_index(data):

experiments_data = collect("data", ("group_dataset_inputs_by_experiment",))

# NOTE: Same a `groups_data_by_process` in `construction.py`
procs_data = collect("data", ("group_dataset_inputs_by_process",))
Comment on lines +242 to 243
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

feel free to delete one (in master)



Expand Down Expand Up @@ -271,6 +272,42 @@ def groups_index(groups_data):
return df.index


def group_kin_table_no_table(groups_data, groups_index):
"""Generate a table containing the kinematics and the process_type."""
result_records = []
for group_data in groups_data:
group_cd = group_data.load_commondata()
cd = np.concatenate(
[
group_cd[i].commondata_table[['kin1', 'kin2', 'kin3', 'process']]
for i in range(len(group_cd))
],
axis=0,
)
for index, dataset in enumerate(cd):
try:
process_name = dataset[3].name
except AttributeError:
process_name = dataset[3]
result_records.append(
dict(
[
("kin_1", dataset[0]),
("kin_2", dataset[1]),
("kin_3", dataset[2]),
("process_type", process_name),
]
)
)

if not result_records:
log.warning("Empty records for group results")
return pd.DataFrame()
df = pd.DataFrame(result_records, columns=result_records[0].keys(), index=groups_index)

return df


def experiments_index(experiments_data):
return groups_index(experiments_data)

Expand Down
Loading
Loading