NNPDF · achiefa · Jul 15, 2024 · Jul 15, 2024 · Jul 15, 2024 · Jul 16, 2024
diff --git a/validphys2/examples/theory_covariance/chi2table_ht.yaml b/validphys2/examples/theory_covariance/chi2table_ht.yaml
@@ -0,0 +1,107 @@
+# This is the driver template for vp-comparefits. It consists on a validphys
+# runcard where some settings are missing and are to be filled by the
+# vp-comparefits script. The settings below are a sample of such settings, kept
+# for reference
+#
+# meta:
+#   title: The title of the Report
+#   keywords: [report_template]
+#   author: NNPDF Collaboration
+#
+# current:
+#   fit: {id: id_of_the_base_fit}
+#   pdf: {id: id_of_the_base_fit, label: "Current Fit"}
+#   theory:
+#     from_: fit
+#   theoryid:
+#     from_: theory
+#   speclabel: "Current Fit"
+#
+# reference:
+#   fit: {id: id_of_the_reference_fit}
+#   pdf: {id: id_of_the_reference_fit, label: "Reference Fit" }
+#   theory:
+#     from_: fit
+#   theoryid:
+#     from_: theory
+#   speclabel: "Reference Fit"
+
+pdfs:
+  - {id: "240816-06-7-01-lc", label: "HT low cuts"}
+  - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"}
+  - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"}
+  - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"}
+  - {id: "240807-midcuts", label: "no HT mid cuts"}
+  - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"}
+
+fits:
+  - {id: "240816-06-7-01-lc", label: "HT low cuts"}
+  - {id: "240812-02-ABMP-lnv", label: "HT mid cuts"}
+  - {id: "240812-04-ABMP-lnv", label: "HT std. cuts"}
+  - {id: "240819_nnpdf40_lowcuts", label: "no HYT low cuts"}
+  - {id: "240807-midcuts", label: "no HT mid cuts"}
+  - {id: "NNPDF40_nnlo_as_01180_qcd", label: "no HT std cuts (NNPDF40)"}
+
+use_cuts: "fromfit"
+use_weights_in_covmat: False
+use_thcovmat_if_present: True
+
+Q: 1.651
+
+#template: report.md
+
+description:
+  from_: fit
+
+dataset_inputs:
+  from_: fit
+
+#dataspecs:
+#  - theoryid:
+#      from_: current
+#    pdf:
+#      from_: current
+#    fit:
+#      from_: current
+#    speclabel:
+#      from_: current
+#
+#  - theoryid:
+#      from_: reference
+#    pdf:
+#      from_: reference
+#    fit:
+#      from_: reference
+#    speclabel:
+#      from_: reference
+
+Datanorm:
+  normalize_to: data
+
+DataGroups:
+  - metadata_group: nnpdf31_process
+  - metadata_group: experiment
+
+ProcessGroup:
+    metadata_group: nnpdf31_process
+
+template_text: |
+  Summary
+  -------
+  {@ summarise_fits @}
+
+  {@with DataGroups@}
+  $\chi^2$ by {@processed_metadata_group@}
+  ----------------------------------------
+  {@plot_fits_groups_data_chi2@}
+  {@endwith@}
+
+  $\chi^2$ by dataset
+  -------------------
+  ### Plot
+  {@plot_fits_datasets_chi2@}
+  ### Table
+  {@ProcessGroup fits_chi2_table(show_total=true)@}
+
+actions_:
+  - report(main=true)
diff --git a/validphys2/src/validphys/commondata.py b/validphys2/src/validphys/commondata.py
@@ -6,9 +6,12 @@
 :py:mod:`validphys.coredata`
 
 """
+
+import functools
+
 from reportengine import collect
 from validphys.commondataparser import load_commondata
-import functools
+
 
 @functools.lru_cache
 def loaded_commondata_with_cuts(commondata, cuts):
@@ -35,3 +38,7 @@ def loaded_commondata_with_cuts(commondata, cuts):
 groups_dataset_inputs_loaded_cd_with_cuts = collect(
     "loaded_commondata_with_cuts", ("group_dataset_inputs_by_metadata", "data_input")
 )
+
+groups_dataset_inputs_loaded_cd_with_cuts_byprocess = collect(
+    "loaded_commondata_with_cuts", ("group_dataset_inputs_by_process", "data")
+)
diff --git a/validphys2/src/validphys/config.py b/validphys2/src/validphys/config.py
@@ -1157,6 +1157,28 @@ def produce_loaded_user_covmat_path(self, user_covmat_path: str = ""):
             fileloc = l.check_vp_output_file(user_covmat_path)
             return fileloc
 
+    @configparser.explicit_node
+    def produce_covmat_custom(self, use_ht_uncertainties: bool = False):
+        if use_ht_uncertainties:
+            from validphys.theorycovariance.construction import thcov_ht
+
+            return thcov_ht
+        else:
+            from validphys.theorycovariance.construction import covs_pt_prescrip
+
+            return covs_pt_prescrip
+
+    @configparser.explicit_node
+    def produce_combine_custom(self, use_ht_uncertainties: bool = False):
+        if use_ht_uncertainties:
+            from validphys.theorycovariance.construction import combine_by_type_ht
+
+            return combine_by_type_ht
+        else:
+            from validphys.theorycovariance.construction import combine_by_type
+
+            return combine_by_type
+
     @configparser.explicit_node
     def produce_nnfit_theory_covmat(
         self, point_prescriptions: list = None, user_covmat_path: str = None
@@ -1183,8 +1205,31 @@ def produce_nnfit_theory_covmat(
             from validphys.theorycovariance.construction import user_covmat_fitting
 
             f = user_covmat_fitting
+        elif use_ht_uncertainties:
+            # NOTE: this covmat is the same as for scale variations, which will result in a clash of
+            # table names if we wish to use them simultaneously
+            if use_user_uncertainties:
+                from validphys.theorycovariance.construction import total_theory_covmat_fitting
 
-        return f
+                f = total_theory_covmat_fitting
+            else:
+                from validphys.theorycovariance.construction import theory_covmat_custom_fitting
+
+                f = theory_covmat_custom_fitting
+
+        @functools.wraps(f)
+        def res(*args, **kwargs):
+            return f(*args, **kwargs)
+
+        # Set this to get the same filename regardless of the action.
+        res.__name__ = "theory_covmat"
+        return res
+
+    @configparser.explicit_node
+    def produce_combine_by_type_custom(self, use_ht_uncertainties: bool = False):
+        if use_ht_uncertainties:
+            return validphys.theorycovariance.construction.combine_by_type_ht
+        return validphys.theorycovariance.construction.combine_by_type
 
     def produce_fitthcovmat(
         self, use_thcovmat_if_present: bool = False, fit: (str, type(None)) = None
@@ -1731,15 +1776,15 @@ def produce_filter_data(self, fakedata: bool = False, theorycovmatconfig=None):
         if not fakedata:
             return validphys.filters.filter_real_data
         else:
-            if theorycovmatconfig is not None and theorycovmatconfig.get(
-                "use_thcovmat_in_sampling"
-            ):
-                # NOTE: By the time we run theory covmat closure tests,
-                # hopefully the generation of pseudodata will be done in python.
-                raise ConfigError(
-                    "Generating closure test data which samples from the theory "
-                    "covariance matrix has not been implemented yet."
-                )
+            # if theorycovmatconfig is not None and theorycovmatconfig.get(
+            #    "use_thcovmat_in_sampling"
+            # ):
+            #    # NOTE: By the time we run theory covmat closure tests,
+            #    # hopefully the generation of pseudodata will be done in python.
+            #    raise ConfigError(
+            #        "Generating closure test data which samples from the theory "
+            #        "covariance matrix has not been implemented yet."
+            #    )
             return validphys.filters.filter_closure_data_by_experiment
 
     @configparser.explicit_node

diff --git a/validphys2/src/validphys/dataplots.py b/validphys2/src/validphys/dataplots.py
@@ -2,8 +2,6 @@
 Plots of relations between data PDFs and fits.
 """
 
-from __future__ import generator_stop
-
 from collections import defaultdict
 from collections.abc import Sequence
 import itertools
@@ -28,7 +26,7 @@
 from validphys.coredata import KIN_NAMES
 from validphys.plotoptions.core import get_info, kitable, transform_result
 from validphys.results import chi2_stat_labels, chi2_stats
-from validphys.sumrules import POL_LIMS, partial_polarized_sum_rules
+from validphys.sumrules import POL_LIMS
 from validphys.utils import sane_groupby_iter, scale_from_grid, split_ranges
 
 log = logging.getLogger(__name__)
@@ -301,9 +299,7 @@ def _plot_fancy_impl(
         min_vals = []
         max_vals = []
         fig, ax = plotutils.subplots()
-        ax.set_title(
-            "{} {}".format(info.dataset_label, info.group_label(samefig_vals, info.figure_by))
-        )
+        ax.set_title(f"{info.dataset_label} {info.group_label(samefig_vals, info.figure_by)}")
 
         lineby = sane_groupby_iter(fig_data, info.line_by)
 
@@ -1287,7 +1283,7 @@ def _check_display_cuts_requires_use_cuts(display_cuts, use_cuts):
 
 @make_argcheck
 def _check_marker_by(marker_by):
-    markers = ('process type', 'experiment', 'dataset', 'group')
+    markers = ('process type', 'experiment', 'dataset', 'group', 'kinematics')
     if marker_by not in markers:
         raise CheckError("Unknown marker_by value", marker_by, markers)
 
@@ -1346,7 +1342,8 @@ def plot_xq2(
     will be displaed and marked.
 
     The points are grouped according to the `marker_by` option. The possible
-    values are: "process type", "experiment", "group" or "dataset".
+    values are: "process type", "experiment", "group" or "dataset" for discrete
+    colors, or "kinematics" for coloring by 1/(Q2(1-x))
 
     Some datasets can be made to appear highlighted in the figure: Define a key
     called ``highlight_datasets`` containing the names of the datasets to be
@@ -1461,6 +1458,7 @@ def plot_xq2(
 
     xh = defaultdict(list)
     q2h = defaultdict(list)
+    cvdict = defaultdict(list)
 
     if not highlight_datasets:
         highlight_datasets = set()
@@ -1491,6 +1489,8 @@ def next_options():
         elif marker_by == "group":
             # if group is None then make sure that shows on legend.
             key = str(group)
+        elif marker_by == "kinematics":
+            key = None
         else:
             raise ValueError('Unknown marker_by value')
 
@@ -1506,6 +1506,7 @@ def next_options():
             xdict = x
             q2dict = q2
 
+        cvdict[key].append(commondata.load().get_cv())
         xdict[key].append(fitted[0])
         q2dict[key].append(fitted[1])
         if display_cuts:
@@ -1520,6 +1521,13 @@ def next_options():
         else:
             # This is to get the label key
             coords = [], []
+        if marker_by == "kinematics":
+            ht_magnitude = np.concatenate(cvdict[key]) / (coords[1] * (1 - coords[0]))
+            out = ax.scatter(
+                *coords, marker='.', c=ht_magnitude, cmap="viridis", norm=mcolors.LogNorm()
+            )
+            clb = fig.colorbar(out)
+            clb.ax.set_title(r'$F_\mathrm{exp}\frac{1}{Q^2(1-x)}$')
         ax.plot(*coords, label=key, markeredgewidth=1, markeredgecolor=None, **key_options[key])
 
     # Iterate again so highlights are printed on top.

diff --git a/validphys2/src/validphys/results.py b/validphys2/src/validphys/results.py
@@ -239,6 +239,7 @@ def data_index(data):
 
 experiments_data = collect("data", ("group_dataset_inputs_by_experiment",))
 
+# NOTE: Same a `groups_data_by_process` in `construction.py`
 procs_data = collect("data", ("group_dataset_inputs_by_process",))
 
 
@@ -271,6 +272,42 @@ def groups_index(groups_data):
     return df.index
 
 
+def group_kin_table_no_table(groups_data, groups_index):
+    """Generate a table containing the kinematics and the process_type."""
+    result_records = []
+    for group_data in groups_data:
+        group_cd = group_data.load_commondata()
+        cd = np.concatenate(
+            [
+                group_cd[i].commondata_table[['kin1', 'kin2', 'kin3', 'process']]
+                for i in range(len(group_cd))
+            ],
+            axis=0,
+        )
+        for index, dataset in enumerate(cd):
+            try:
+                process_name = dataset[3].name
+            except AttributeError:
+                process_name = dataset[3]
+            result_records.append(
+                dict(
+                    [
+                        ("kin_1", dataset[0]),
+                        ("kin_2", dataset[1]),
+                        ("kin_3", dataset[2]),
+                        ("process_type", process_name),
+                    ]
+                )
+            )
+
+    if not result_records:
+        log.warning("Empty records for group results")
+        return pd.DataFrame()
+    df = pd.DataFrame(result_records, columns=result_records[0].keys(), index=groups_index)
+
+    return df
+
+
 def experiments_index(experiments_data):
     return groups_index(experiments_data)