From 25ab56870d6ce52c71d724d5e60e60ee5deaefa5 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 11 Jun 2024 17:03:46 -0400 Subject: [PATCH] responding to @melanieclarke and @braingram comments --- docs/jwst/badpix_selfcal/arguments.rst | 2 +- docs/jwst/badpix_selfcal/description.rst | 22 ++-- jwst/badpix_selfcal/badpix_selfcal.py | 21 ++-- jwst/badpix_selfcal/badpix_selfcal_step.py | 109 ++++++++++++------ .../tests/test_badpix_selfcal.py | 12 +- .../master_background_step.py | 55 +++------ jwst/pipeline/calwebb_spec2.py | 17 +-- jwst/regtest/test_miri_mrs_badpix_selfcal.py | 37 ++++-- 8 files changed, 165 insertions(+), 110 deletions(-) diff --git a/docs/jwst/badpix_selfcal/arguments.rst b/docs/jwst/badpix_selfcal/arguments.rst index 957c4acd7c4..6f9f73ac772 100644 --- a/docs/jwst/badpix_selfcal/arguments.rst +++ b/docs/jwst/badpix_selfcal/arguments.rst @@ -8,7 +8,7 @@ The ``badpix_selfcal`` step has the following optional arguments. ``--kernel_size`` (integer, default=15) The size of the kernel to use for the median filter, which is applied - in the spectral (Y-axis) direction to make the smoothed image. + in the spectral direction to make the smoothed image. ``--save_flagged_bkgd`` (boolean, default=False) Whether to save the flagged background exposures to fits files. diff --git a/docs/jwst/badpix_selfcal/description.rst b/docs/jwst/badpix_selfcal/description.rst index 0990b277264..5f6caeffbfe 100644 --- a/docs/jwst/badpix_selfcal/description.rst +++ b/docs/jwst/badpix_selfcal/description.rst @@ -8,7 +8,7 @@ Overview -------- The ``badpix_selfcal`` step flags bad pixels in the input data using a self-calibration technique based on median filtering along the spectral axis. -When background exposures are available, those are used in combination with the science +When additional exposures are available, those are used in combination with the science exposure to identify bad pixels; when unavailable, the step will be skipped with a warning unless the ``force_single`` parameter is set True. In that case, the science data alone is used as its own "background". @@ -18,22 +18,27 @@ in the :ref:`calwebb_spec2 ` pipeline. Input details ------------- -The input data must be in the form of a `~jwst.datamodels.IFUImageModel` or +The input data must be in the form of a `~jwst.datamodels.IFUImageModel` or a `~jwst.datamodels.ModelContainer` containing exactly one -science exposure and any number of background exposures. A fits or association file +science exposure and any number of additional exposures. +A fits or association file that can be read into one of these data models is also acceptable. +Any exposure with the metadata attribute ``asn.exptype`` set to +``background`` or ``selfcal`` will be used in conjunction with the science +exposure to construct the combined background image. Algorithm --------- The algorithm relies on the assumption that bad pixels are outliers in the data along the spectral axis. The algorithm proceeds as follows: -* A combined background image is created. If background exposures are available, - the pixelwise minimum of all background and science exposures is taken. - If no background exposures are available, the science data itself is passed in +* A combined background image is created. If additional (``selfcal`` or ``background``) + exposures are available, + the pixelwise minimum of all background, selfcal, and science exposures is taken. + If no additional exposures are available, the science data itself is passed in without modification, serving as the "background image" for the rest of the procedure, i.e., true self-calibration. -* The combined background image is median-filtered, ignoring NaNs, along the spectral (Y-) axis +* The combined background image is median-filtered, ignoring NaNs, along the spectral axis with a user-specified kernel size. The default kernel size is 15 pixels. * The difference between the original background image and the median-filtered background image is taken. The highest- and lowest-flux pixels in this difference image are @@ -42,7 +47,8 @@ the spectral axis. The algorithm proceeds as follows: using the ``flagfrac`` parameter. The total fraction of flagged pixels is thus 2x ``flagfrac``. * The bad pixels are flagged in the input data by setting the DQ flag to "OTHER_BAD_PIXEL" and "DO_NOT_USE". -* The bad pixels are also flagged in each background exposure, if available. +* The bad pixels are also flagged in each exposure with ``asn.exptype`` equal to ``background``, + if available. Output product -------------- diff --git a/jwst/badpix_selfcal/badpix_selfcal.py b/jwst/badpix_selfcal/badpix_selfcal.py index 1dec265f50a..2c2b19a857c 100644 --- a/jwst/badpix_selfcal/badpix_selfcal.py +++ b/jwst/badpix_selfcal/badpix_selfcal.py @@ -6,7 +6,8 @@ def badpix_selfcal(minimg: np.ndarray, - flagfrac: float = 0.001, + flagfrac_lower: float = 0.001, + flagfrac_upper: float = 0.001, kernel_size: int = 15, dispaxis=None) -> np.ndarray: """ @@ -18,8 +19,10 @@ def badpix_selfcal(minimg: np.ndarray, Selfcal data of shape (x, y), i.e., after some operation has already been taken to combine multiple exposures, typically a MIN operation. - flagfrac: float - Fraction of pixels to flag on each of low and high end + flagfrac_lower: float + Fraction of pixels to flag on the low end + flagfrac_upper: float + Fraction of pixels to flag on the high end kernel_size: int Size of kernel for median filter dispaxis: int @@ -32,9 +35,11 @@ def badpix_selfcal(minimg: np.ndarray, Indices of the flagged pixels, shaped like output from np.where """ - if flagfrac < 0 or flagfrac >= 0.5: - raise ValueError("flagfrac must be between 0 and 0.5. \ - Note this fraction will be flagged on both high and low ends.") + if (flagfrac_lower < 0) or (flagfrac_upper < 0): + raise ValueError("Flagged fraction must be between 0 and 0.5") + if flagfrac_lower + flagfrac_upper >= 1: + raise ValueError("Total flagged fraction (lower plus upper) must be less than 1") + if dispaxis not in [1, 2, None]: raise ValueError("dispaxis must be either 1 or 2, or None.") @@ -51,7 +56,7 @@ def badpix_selfcal(minimg: np.ndarray, minimg_hpf = minimg - smoothed # Flag outliers using percentile cutoff - flag_low, flag_high = np.nanpercentile(minimg_hpf, [flagfrac * 100, (1 - flagfrac) * 100]) + flag_low, flag_high = np.nanpercentile(minimg_hpf, [flagfrac_lower * 100, (1 - flagfrac_upper) * 100]) bad = (minimg_hpf > flag_high) | (minimg_hpf < flag_low) flagged_indices = np.where(bad) @@ -77,7 +82,7 @@ def apply_flags(input_model: dm.IFUImageModel, flagged_indices: np.ndarray) -> d Flagged data model """ - input_model.dq[flagged_indices] = pixel["DO_NOT_USE"] + pixel["OTHER_BAD_PIXEL"] + input_model.dq[flagged_indices] += pixel["DO_NOT_USE"] + pixel["OTHER_BAD_PIXEL"] input_model.data[flagged_indices] = np.nan input_model.err[flagged_indices] = np.nan diff --git a/jwst/badpix_selfcal/badpix_selfcal_step.py b/jwst/badpix_selfcal/badpix_selfcal_step.py index c7c33725d39..d62ebfca90e 100644 --- a/jwst/badpix_selfcal/badpix_selfcal_step.py +++ b/jwst/badpix_selfcal/badpix_selfcal_step.py @@ -4,7 +4,6 @@ from . import badpix_selfcal import numpy as np from jwst import datamodels as dm -from jwst.master_background.master_background_step import split_container import logging log = logging.getLogger(__name__) @@ -21,14 +20,16 @@ class BadpixSelfcalStep(Step): All input exposures in the association file (or manually-provided bkg_list) are combined into a single background model using a MIN operation. The bad pixels are then identified using a median filter and percentile cutoffs, and applied to the science data by setting - the flagged pixels to NaN and the DQ flag to DO_NOT_USE + OTHER_BAD_PIXEL. + the flagged pixels, errors, and variances to NaN + and the DQ flag to DO_NOT_USE + OTHER_BAD_PIXEL. """ class_alias = "badpix_selfcal" bkg_suffix = "badpix_selfcal" spec = """ - flagfrac = float(default=0.001) #fraction of pixels to flag on each of low and high end + flagfrac_lower = float(default=0.001) #fraction of pixels to flag on the low-flux end + flagfrac_upper = float(default=0.001) #fraction of pixels to flag on the high-flux end kernel_size = integer(default=15) #size of kernel for median filter force_single = boolean(default=False) #force single input exposure skip = boolean(default=True) @@ -41,9 +42,9 @@ def process(self, input, selfcal_list=[]): Parameters ---------- input: JWST data model or association - input science data to be corrected + input science data to be corrected, or tuple of (sci, bkg, selfcal) - selfcal_list: list of ImageModels or filenames to use for selfcal + selfcal_list: list of user-defined ImageModels or filenames to use for selfcal Returns ------- @@ -62,13 +63,15 @@ def process(self, input, selfcal_list=[]): In that case, the input exposure will be used as the sole background exposure, i.e., true self-calibration. """ - input_sci, selfcal_list, bkg_list_asn = _parse_inputs(input, selfcal_list) + input_sci, selfcal_list, bkg_list_asn = _parse_inputs(input, selfcal_list) # ensure that there are background exposures to use, otherwise skip the step # unless forced if (len(selfcal_list) == 0) and (not self.force_single): - log.warning("No background exposures provided for self-calibration. Skipping step.") - self.record_step_status(input_sci, "badpix_selfcal", success=False) + log.warning("No selfcal or background exposures provided for self-calibration. \ + Skipping step. If you want to force self-calibration with the science \ + exposure alone (generally not recommended), set force_single=True.") + input_sci.meta.cal_step.badpix_selfcal = 'SKIPPED' return input_sci, bkg_list_asn # get the dispersion axis @@ -79,17 +82,15 @@ def process(self, input, selfcal_list=[]): Kernel for self-calibration will be two-dimensional.") dispaxis = None - # open all selfcal exposures + # collapse all selfcal exposures into a single background model # note that selfcal_list includes the science exposure. This is expected. # all exposures are combined into a single background model using a MIN operation. selfcal_list = [input_sci] + [dm.open(k) for k in selfcal_list] - - # collapse background dithers into a single background model selfcal_3d = [] for i, selfcal_model in enumerate(selfcal_list): selfcal_3d.append(selfcal_model.data) minimg = np.nanmin(np.asarray(selfcal_3d), axis=0) - bad_indices = badpix_selfcal.badpix_selfcal(minimg, self.flagfrac, self.kernel_size, dispaxis) + bad_indices = badpix_selfcal.badpix_selfcal(minimg, self.flagfrac_lower, self.flagfrac_upper, self.kernel_size, dispaxis) # apply the flags to the science data input_sci = badpix_selfcal.apply_flags(input_sci, bad_indices) @@ -97,14 +98,13 @@ def process(self, input, selfcal_list=[]): # apply the flags to the background data to be passed to background sub step if len(bkg_list_asn) > 0: for i, background_model in enumerate(bkg_list_asn): - bkg_list_asn[i] = badpix_selfcal.apply_flags(background_model, bad_indices) + bkg_list_asn[i] = badpix_selfcal.apply_flags(dm.open(background_model), bad_indices) - self.record_step_status(input_sci, "badpix_selfcal", success=True) + input_sci.meta.cal_step.badpix_selfcal = 'COMPLETE' + return input_sci, bkg_list_asn - return input_sci, *bkg_list_asn - -def _parse_inputs(input, selfcal_list=[]): +def _parse_inputs(input, selfcal_list): """ Parse the input to the step. This is a helper function that is used in the command line interface to the step. @@ -112,10 +112,9 @@ def _parse_inputs(input, selfcal_list=[]): Parameters ---------- input: str - input file or association + input file or association, or tuple of (sci, bkg, selfcal) - selfcal_list: list - ImageModels or filenames to use for selfcal + selfcal_list: list of user-supplied ImageModels or filenames to use for selfcal Returns ------- @@ -124,30 +123,66 @@ def _parse_inputs(input, selfcal_list=[]): selfcal_list: list of ImageModels or filenames to use for selfcal """ - with dm.open(input) as input_data: + selfcal_list_user = selfcal_list + + if isinstance(input, tuple): + input_sci = dm.open(input[0]) + selfcal_list = list(input[1]) + list(input[2]) + bkg_list_asn = input[1] - # find science and background exposures. - if isinstance(input_data, dm.ModelContainer): + else: + with dm.open(input) as input_data: - sci_models, bkg_list_asn, selfcal_list_asn = split_container(input_data, - fields=['science', 'background', 'selfcal']) + # find science and background exposures. + if isinstance(input_data, dm.ModelContainer): + + sci_models, bkg_list_asn, selfcal_list_asn = split_container_by_asn_exptype( + input_data, exptypes=['science', 'background', 'selfcal']) - if len(selfcal_list) == 0: selfcal_list = list(bkg_list_asn) + list(selfcal_list_asn) + + if len(sci_models) > 1: + raise ValueError("Input data contains multiple science exposures. \ + This is not supported in calwebb_spec2 steps.") + input_sci = sci_models[0] + + elif isinstance(input_data, dm.IFUImageModel) or isinstance(input_data, dm.ImageModel): + + input_sci = input_data + bkg_list_asn = [] + else: - log.warning("selfcal_list provided directly as input, ignoring background \ - and selfcal exposure types in the association file") + raise TypeError("Input data is not a ModelContainer, ImageModel, or IFUImageModel.\ + Cannot continue.") + + if len(selfcal_list_user) > 0: + selfcal_list = selfcal_list_user + log.warning("selfcal_list provided directly as input, ignoring any other \ + input background and selfcal exposure types, from e.g. an input \ + association file") + return input_sci, selfcal_list, bkg_list_asn - # in calwebb_spec2 there should be only a single science exposure in an association - input_sci = sci_models[0] - elif isinstance(input_data, dm.IFUImageModel) or isinstance(input_data, dm.ImageModel): +def split_container_by_asn_exptype(container: dm.ModelContainer, exptypes: list) -> list: + """ + Split a ModelContainer into a list of ImageModels based on exposure type. - input_sci = input_data - bkg_list_asn = [] + Parameters + ---------- + container: ModelContainer + input data - else: - raise TypeError("Input data is not a ModelContainer, ImageModel, or IFUImageModel.\ - Cannot continue.") + exptype: str + exposure type to split on - return input_sci, selfcal_list, bkg_list_asn + Returns + ------- + split_list: list of lists + lists of ImageModels + """ + split_list = [] + for exptype in exptypes: + good_models = [container[j] for j in range(len(container)) if container[j].meta.asn.exptype == exptype] + split_list.append(good_models) + + return split_list diff --git a/jwst/badpix_selfcal/tests/test_badpix_selfcal.py b/jwst/badpix_selfcal/tests/test_badpix_selfcal.py index ae6260e4bd0..56619de2290 100644 --- a/jwst/badpix_selfcal/tests/test_badpix_selfcal.py +++ b/jwst/badpix_selfcal/tests/test_badpix_selfcal.py @@ -202,7 +202,7 @@ def test_background_flagger_mrs(background): # pass into the MRSBackgroundFlagger and check it found the right pixels flagfrac = 0.001 - result = badpix_selfcal(bg, flagfrac=flagfrac) + result = badpix_selfcal(bg, flagfrac_lower=flagfrac, flagfrac_upper=flagfrac) result_tuples = [(i, j) for i, j in zip(*result)] # check that the hot pixels were among those flagged @@ -248,10 +248,14 @@ def test_badpix_selfcal_step(request, dset): assert result[0].meta.cal_step.badpix_selfcal == "COMPLETE" if dset == "sci": - assert len(result) == 1 + assert len(result) == 2 + assert len(result[0]) == 1 + assert len(result[1]) == 0 else: - # should return sci, bkg0, bkg1 but not selfcal0, selfcal1 - assert len(result) == 3 + # should return sci, (bkg0, bkg1) but not selfcal0, selfcal1 + assert len(result) == 2 + assert len(result[0]) == 1 + assert len(result[1]) == 2 def test_expected_fail_sci(sci): diff --git a/jwst/master_background/master_background_step.py b/jwst/master_background/master_background_step.py index 418af6ea25f..76efbbff64e 100755 --- a/jwst/master_background/master_background_step.py +++ b/jwst/master_background/master_background_step.py @@ -236,48 +236,29 @@ def copy_background_to_surf_bright(spectrum): spec.spec_table['BKGD_ERROR'][:] = 0. -def split_container(container: ModelContainer, fields=['science', 'background']): +def split_container(container): + """Divide a ModelContainer with science and background into one of each """ - Divide a ModelContainer with multiple exposure types, e.g. - science, background, and selfcal members, into separate containers. - Similar to, but distinct from, the split_container - method in MasterBackgroundStep, because this method - includes selfcal member handling. + asn = container.meta.asn_table.instance - Parameters - ---------- - container: ModelContainer - The input container + background = ModelContainer() + science = ModelContainer() - fields: list, optional. - The list of exposure type strings to split the container into. Default - is ['science', 'background']. + for ind_science in container.ind_asn_type('science'): + science.append(container._models[ind_science]) - Returns - ------- - container_list: list - List of ModelContainers, one for each field. - """ - asn = container.meta.asn_table.instance + for ind_bkgd in container.ind_asn_type('background'): + background.append(container._models[ind_bkgd]) - container_list = [] - for field in fields: - sub_container = ModelContainer() - for ind_field in container.ind_asn_type(field): - sub_container.append(container._models[ind_field]) - - # Pass along the association table to the output container - if field == "science": - sub_container.meta.asn_table = {} - sub_container.asn_pool_name = container.asn_pool_name - sub_container.asn_table_name = container.asn_table_name - merge_tree(sub_container.meta.asn_table.instance, asn) - # Prune the non-science members from the table - for p in sub_container.meta.asn_table.instance['products']: - p['members'] = [m for m in p['members'] if m['exptype'].lower() == 'science'] - container_list.append(sub_container) - - return container_list + # Pass along the association table to the output science container + science.meta.asn_table = {} + science.asn_pool_name = container.asn_pool_name + science.asn_table_name = container.asn_table_name + merge_tree(science.meta.asn_table.instance, asn) + # Prune the background members from the table + for p in science.meta.asn_table.instance['products']: + p['members'] = [m for m in p['members'] if m['exptype'].lower() != 'background'] + return science, background def subtract_2d_background(source, background): diff --git a/jwst/pipeline/calwebb_spec2.py b/jwst/pipeline/calwebb_spec2.py index e6f40fbb9c6..894847884bd 100644 --- a/jwst/pipeline/calwebb_spec2.py +++ b/jwst/pipeline/calwebb_spec2.py @@ -245,17 +245,20 @@ def process_exposure_product( raise RuntimeError('Cannot determine WCS.') # Steps whose order is the same for all types of input: - # self-calibrate bad/warm pixels, and apply to both background and science # skipped by default for all modes - result = self.badpix_selfcal(calibrated, members_by_type['background']) - if isinstance(result, tuple): + print(type(members_by_type['background'])) + result = self.badpix_selfcal( + (calibrated, members_by_type['background'], members_by_type['selfcal']), + ) + if len(result) == 1: + # if step is skipped, unchanged sci exposure is returned + calibrated = result[0] + else: # if step actually occurs, then flagged backgrounds are also returned - calibrated, bkg_outlier_flagged = result[0], result[1:] + calibrated, bkg_outlier_flagged = result[0], result[1] members_by_type['background'] = bkg_outlier_flagged - else: - # if step is skipped, result is just input - calibrated = result + print(type(members_by_type['background'])) # apply msa_flagging (flag stuck open shutters for NIRSpec IFU and MOS) calibrated = self.msa_flagging(calibrated) diff --git a/jwst/regtest/test_miri_mrs_badpix_selfcal.py b/jwst/regtest/test_miri_mrs_badpix_selfcal.py index e4755ac9285..a040ac69e5a 100644 --- a/jwst/regtest/test_miri_mrs_badpix_selfcal.py +++ b/jwst/regtest/test_miri_mrs_badpix_selfcal.py @@ -1,28 +1,49 @@ import pytest from astropy.io.fits.diff import FITSDiff - -from jwst.badpix_selfcal import BadpixSelfcalStep +from jwst.stpipe import Step @pytest.fixture(scope="module") -def run_pipeline(rtdata_module): +def run_pipeline_background(rtdata_module): rtdata = rtdata_module rtdata.get_asn("miri/mrs/jw01204-o021_20240127t024203_spec2_00010_asn.json") - BadpixSelfcalStep.call(rtdata.input, skip=False, save_results=True, flagfrac=0.0005) + Step.from_cmdline(['calwebb_spec2', rtdata.input, + "--steps.badpix_selfcal.save_results=True", + "--steps.badpix_selfcal.flagfrac_lower=0.0005", + "--steps.badpix_selfcal.skip=False"]) + rtdata.output = "jw01204021001_02101_00004_mirifulong_0_badpix_selfcal.fits" + return rtdata + + +@pytest.fixture(scope="module") +def run_pipeline_selfcal(rtdata_module): + '''Identical pipeline run to above, but input asn sets all background exposures as `selfcal` type. + ''' + rtdata = rtdata_module + rtdata.get_asn("miri/mrs/jw01204-o021_20240127t024203_spec2_00010_selfcal_asn.json") + Step.from_cmdline(['calwebb_spec2', rtdata.input, + "--steps.badpix_selfcal.save_results=True", + "--steps.badpix_selfcal.flagfrac_lower=0.0005", + "--steps.badpix_selfcal.skip=False"]) + rtdata.output = "jw01204021001_02101_00004_mirifulong_0_badpix_selfcal.fits" + return rtdata @pytest.mark.bigdata -def test_miri_mrs_badpix_selfcal(run_pipeline, fitsdiff_default_kwargs,): +@pytest.mark.parametrize("run_pipeline", ["run_pipeline_background", "run_pipeline_selfcal"]) +def test_miri_mrs_badpix_selfcal(run_pipeline, fitsdiff_default_kwargs, request): """Run a test for MIRI MRS data with dedicated background exposures.""" - rtdata = run_pipeline - rtdata.output = "jw01204-o021_20240127t024203_spec2_00010_asn_0_badpixselfcalstep.fits" + rtdata = request.getfixturevalue(run_pipeline) + + import os + print(os.listdir(".")) # Get the truth file rtdata.get_truth("truth/test_miri_mrs_badpix_selfcal/jw01204-o021_20240127t024203_spec2_00010_asn_0_badpixselfcalstep.fits") # Compare the results diff = FITSDiff(rtdata.output, rtdata.truth, **fitsdiff_default_kwargs) - assert diff.identical, diff.report() \ No newline at end of file + assert diff.identical, diff.report()