diff --git a/docs/notebooks/import_catalogs.ipynb b/docs/notebooks/import_catalogs.ipynb index c604205e..03708be7 100644 --- a/docs/notebooks/import_catalogs.ipynb +++ b/docs/notebooks/import_catalogs.ipynb @@ -293,14 +293,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.10.12" } }, "nbformat": 4, diff --git a/docs/notebooks/ztf_bts-ngc.ipynb b/docs/notebooks/ztf_bts-ngc.ipynb index 7bb0c585..b34891e8 100644 --- a/docs/notebooks/ztf_bts-ngc.ipynb +++ b/docs/notebooks/ztf_bts-ngc.ipynb @@ -173,7 +173,9 @@ " matched_df = matched.compute()\n", "\n", "# Let's output transient name, NGC name and angular distance between them\n", - "matched_df = matched_df[[\"IAUID_ztf\", \"Name_ngc\", \"_DIST\", \"RA_ztf\", \"Dec_ztf\"]].sort_values(by=[\"_DIST\"])\n", + "matched_df = matched_df[[\"IAUID_ztf\", \"Name_ngc\", \"_dist_arcsec\", \"RA_ztf\", \"Dec_ztf\"]].sort_values(\n", + " by=[\"_dist_arcsec\"]\n", + ")\n", "matched_df" ] }, @@ -288,7 +290,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/src/lsdb/loaders/dataframe/from_dataframe_utils.py b/src/lsdb/loaders/dataframe/from_dataframe_utils.py index b079e24f..b0135c5d 100644 --- a/src/lsdb/loaders/dataframe/from_dataframe_utils.py +++ b/src/lsdb/loaders/dataframe/from_dataframe_utils.py @@ -23,7 +23,7 @@ def _generate_dask_dataframe( Returns: The catalog's Dask Dataframe and its total number of rows. """ - schema = pixel_dfs[0].iloc[:0, :].copy() + schema = pixel_dfs[0].iloc[:0, :].copy() if len(pixels) > 0 else [] divisions = get_pixels_divisions(pixels) delayed_dfs = [delayed(df) for df in pixel_dfs] ddf = dd.from_delayed(delayed_dfs, meta=schema, divisions=divisions) diff --git a/src/lsdb/loaders/dataframe/margin_catalog_generator.py b/src/lsdb/loaders/dataframe/margin_catalog_generator.py index 7bf545ea..06009bda 100644 --- a/src/lsdb/loaders/dataframe/margin_catalog_generator.py +++ b/src/lsdb/loaders/dataframe/margin_catalog_generator.py @@ -1,14 +1,17 @@ from __future__ import annotations -from typing import List +from typing import Dict, List, Tuple +import dask.dataframe as dd import healpy as hp import hipscat as hc +import numpy as np import pandas as pd from hipscat import pixel_math from hipscat.catalog import CatalogType from hipscat.catalog.margin_cache import MarginCacheCatalogInfo from hipscat.pixel_math import HealpixPixel +from hipscat.pixel_math.healpix_pixel_function import get_pixel_argsort from lsdb import Catalog from lsdb.catalog.margin_catalog import MarginCatalog @@ -27,7 +30,7 @@ def __init__( margin_order: int | None = -1, margin_threshold: float = 5.0, ) -> None: - """Initializes a MarginCatalogGenerator + """Initialize a MarginCatalogGenerator Args: catalog (Catalog): The LSDB catalog to generate margins for @@ -40,11 +43,19 @@ def __init__( self.margin_order = self._set_margin_order(margin_order) def _set_margin_order(self, margin_order: int | None) -> int: - """Set the order of the margin cache to be generated. - If not provided, the margin will be of an order that - is higher than that of the original catalog by 1""" - highest_order = self.hc_structure.partition_info.get_highest_order() - margin_pixel_k = highest_order + 1 + """Calculate the order of the margin cache to be generated. If not provided + the margin will be greater than that of the original catalog by 1. + + Args: + margin_order (int): The order to generate the margin cache with + + Returns: + The validated order of the margin catalog. + + Raises: + ValueError, if the provided margin order is lower than that of the catalog. + """ + margin_pixel_k = self.hc_structure.partition_info.get_highest_order() + 1 if margin_order is None or margin_order == -1: margin_order = margin_pixel_k elif margin_order < margin_pixel_k: @@ -53,73 +64,57 @@ def _set_margin_order(self, margin_order: int | None) -> int: ) return margin_order - def create_catalog(self) -> MarginCatalog: + def create_catalog(self) -> MarginCatalog | None: """Create a margin catalog for another pre-computed catalog Returns: - Margin catalog object for the provided catalog + Margin catalog object, or None if the margin is empty. """ ddf, ddf_pixel_map, total_rows = self._generate_dask_df_and_map() - margin_catalog_info = self._create_catalog_info(total_rows) margin_pixels = list(ddf_pixel_map.keys()) + if total_rows == 0: + return None + margin_catalog_info = self._create_catalog_info(total_rows) margin_structure = hc.catalog.MarginCatalog(margin_catalog_info, margin_pixels) return MarginCatalog(ddf, ddf_pixel_map, margin_structure) - def _generate_dask_df_and_map(self): + def _generate_dask_df_and_map(self) -> Tuple[dd.DataFrame, Dict[HealpixPixel, int], int]: """Create the Dask Dataframe containing the data points in the margins - for the catalog, as well as the mapping of those HEALPix pixels to - HEALPix Dataframes. + for the catalog as well as the mapping of those HEALPix to Dataframes Returns: - Tuple containing the Dask Dataframe, the mapping of HEALPix pixels - to the respective Pandas Dataframes and the total number of rows. + Tuple containing the Dask Dataframe, the mapping of margin HEALPix + to the respective partitions and the total number of rows. """ - # Find the margin pairs of pixels for the catalog healpix_pixels = self.hc_structure.get_healpix_pixels() negative_pixels = self.hc_structure.generate_negative_tree_pixels() combined_pixels = healpix_pixels + negative_pixels margin_pairs_df = self._find_margin_pixel_pairs(combined_pixels) - # Find in which pixels the data is located in the margin catalog - self.dataframe["margin_pixel"] = hp.ang2pix( - 2**self.margin_order, - self.dataframe[self.hc_structure.catalog_info.ra_column].values, - self.dataframe[self.hc_structure.catalog_info.dec_column].values, - lonlat=True, - nest=True, - ) - constrained_data = self.dataframe.reset_index().merge(margin_pairs_df, on="margin_pixel") - - pixel_dfs = [] - ddf_pixel_map = {} - - # For each partition, filter the data according to the threshold - partition_dfs = constrained_data.groupby(["partition_order", "partition_pixel"]) + # Compute points for each margin pixels + margins_pixel_df = self._create_margins(margin_pairs_df) + pixels, partitions = list(margins_pixel_df.keys()), list(margins_pixel_df.values()) - for i, (_, partition) in enumerate(partition_dfs): - order = partition["partition_order"].iloc[0] - pix = partition["partition_pixel"].iloc[0] - pixel = HealpixPixel(order, pix) - df = self._get_partition_data_in_margin(partition, pixel) - pixel_dfs.append(_format_margin_partition_dataframe(df)) - ddf_pixel_map[pixel] = i + # Generate pixel map ordered by _hipscat_index + pixel_order = get_pixel_argsort(pixels) + ordered_pixels = np.asarray(pixels)[pixel_order] + ordered_partitions = [partitions[i] for i in pixel_order] + ddf_pixel_map = {pixel: index for index, pixel in enumerate(ordered_pixels)} - # Generate Dask Dataframe with original schema - pixel_list = list(ddf_pixel_map.keys()) - ddf, total_rows = _generate_dask_dataframe(pixel_dfs, pixel_list) + # Generate the dask dataframe with the pixels and partitions + ddf, total_rows = _generate_dask_dataframe(ordered_partitions, ordered_pixels) return ddf, ddf_pixel_map, total_rows def _find_margin_pixel_pairs(self, pixels: List[HealpixPixel]) -> pd.DataFrame: """Calculate the pairs of catalog pixels and their margin pixels Args: - pixels (List[HealpixPixel]): The list of HEALPix pixels to - compute margin pixels for. These include the catalog - pixels as well as the negative pixels. + pixels (List[HealpixPixel]): The list of HEALPix to compute margin pixels for. + These include the catalog pixels as well as the negative pixels. Returns: - A Pandas Dataframe with the many-to-many mapping between the - partitions and the respective margin pixels. + A Pandas Dataframe with the many-to-many mapping between each catalog HEALPix + and the respective margin pixels. """ n_orders = [] part_pix = [] @@ -140,40 +135,85 @@ def _find_margin_pixel_pairs(self, pixels: List[HealpixPixel]) -> pd.DataFrame: columns=["partition_order", "partition_pixel", "margin_pixel"], ) - def _get_partition_data_in_margin(self, partition_df: pd.DataFrame, pixel: HealpixPixel) -> pd.DataFrame: + def _create_margins(self, margin_pairs_df: pd.DataFrame) -> Dict[HealpixPixel, pd.DataFrame]: + """Compute the margins for all the pixels in the catalog + + Args: + margin_pairs_df (pd.DataFrame): A DataFrame containing all the combinations + of catalog pixels and respective margin pixels + + Returns: + A dictionary mapping each margin pixel to the respective DataFrame. + """ + margin_pixel_df_map: Dict[HealpixPixel, pd.DataFrame] = {} + self.dataframe["margin_pixel"] = hp.ang2pix( + 2**self.margin_order, + self.dataframe[self.hc_structure.catalog_info.ra_column].values, + self.dataframe[self.hc_structure.catalog_info.dec_column].values, + lonlat=True, + nest=True, + ) + constrained_data = self.dataframe.reset_index().merge(margin_pairs_df, on="margin_pixel") + if len(constrained_data): + constrained_data.groupby(["partition_order", "partition_pixel"]).apply( + self._append_margin_df, margin_pixel_df_map + ) + return margin_pixel_df_map + + def _append_margin_df( + self, partition_df: pd.DataFrame, margin_pixel_df_map: Dict[HealpixPixel, pd.DataFrame] + ): + """Filter margin data points and create the partition final Dataframe + + Args: + partition_df (pd.DataFrame): Catalog data points for the margin pixel + margin_pixel_df_map (Dict[HealpixPixel, pd.DataFrame]): A dictionary mapping + each margin pixel to the respective DataFrame. This dictionary is updated + on each call to this method. + """ + partition_order = partition_df["partition_order"].iloc[0] + partition_pixel = partition_df["partition_pixel"].iloc[0] + margin_pixel = HealpixPixel(partition_order, partition_pixel) + df = self._get_data_in_margin(partition_df, margin_pixel) + if len(df): + df = _format_margin_partition_dataframe(df) + margin_pixel_df_map[margin_pixel] = df + + def _get_data_in_margin(self, partition_df: pd.DataFrame, margin_pixel: HealpixPixel) -> pd.DataFrame: """Calculate the margin boundaries for the HEALPix and include the points - on the margins according to the specified threshold. + on the margin according to the specified threshold Args: - partition_df (pd.DataFrame): The partition dataframe - pixel (HealpixPixel): The HEALPix pixel to get the margin points for + partition_df (pd.DataFrame): The margin pixel data + margin_pixel (HealpixPixel): The margin HEALPix Returns: - A Pandas Dataframe with the points of the partition that - are within the specified margin. + A Pandas Dataframe with the points of the partition that are within + the specified threshold in the margin. """ margin_mask = pixel_math.check_margin_bounds( partition_df[self.hc_structure.catalog_info.ra_column].values, partition_df[self.hc_structure.catalog_info.dec_column].values, - pixel.order, - pixel.pixel, + margin_pixel.order, + margin_pixel.pixel, self.margin_threshold, ) - return partition_df.loc[margin_mask] + return partition_df.iloc[margin_mask] def _create_catalog_info(self, total_rows: int) -> MarginCacheCatalogInfo: - """Creates the margin catalog info object + """Create the margin catalog info object Args: - total_rows: The number of elements in the margin catalog + total_rows (int): The number of elements in the margin catalog Returns: - The margin catalog info object + The margin catalog info object. """ + catalog_name = self.hc_structure.catalog_info.catalog_name return MarginCacheCatalogInfo( - catalog_name=f"{self.hc_structure.catalog_info.catalog_name}_margin", + catalog_name=f"{catalog_name}_margin", catalog_type=CatalogType.MARGIN, total_rows=total_rows, - primary_catalog=self.hc_structure.catalog_info.catalog_name, + primary_catalog=catalog_name, margin_threshold=self.margin_threshold, ) diff --git a/tests/conftest.py b/tests/conftest.py index 7f3d978f..605a3711 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,6 +12,7 @@ SMALL_SKY_DIR_NAME = "small_sky" SMALL_SKY_LEFT_XMATCH_NAME = "small_sky_left_xmatch" SMALL_SKY_SOURCE_MARGIN_NAME = "small_sky_source_margin" +SMALL_SKY_ORDER3_SOURCE_MARGIN_NAME = "small_sky_order3_source_margin" SMALL_SKY_XMATCH_NAME = "small_sky_xmatch" SMALL_SKY_XMATCH_MARGIN_NAME = "small_sky_xmatch_margin" SMALL_SKY_TO_XMATCH_NAME = "small_sky_to_xmatch" @@ -181,6 +182,11 @@ def small_sky_source_margin_catalog(test_data_dir): return lsdb.read_hipscat(os.path.join(test_data_dir, SMALL_SKY_SOURCE_MARGIN_NAME)) +@pytest.fixture +def small_sky_order3_source_margin_catalog(test_data_dir): + return lsdb.read_hipscat(os.path.join(test_data_dir, SMALL_SKY_ORDER3_SOURCE_MARGIN_NAME)) + + @pytest.fixture def xmatch_expected_dir(test_data_dir): return os.path.join(test_data_dir, "raw", "xmatch_expected") diff --git a/tests/data/generate_data.ipynb b/tests/data/generate_data.ipynb index 0549b5ef..a803be91 100644 --- a/tests/data/generate_data.ipynb +++ b/tests/data/generate_data.ipynb @@ -224,6 +224,47 @@ "runner.pipeline_with_client(args, client)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### small_sky_order3_source_margin\n", + "\n", + "This one is similar to the previous margin catalogs but it is generated from a source catalog of order 3." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "args = ImportArguments(\n", + " input_file_list=[\"raw/small_sky_source/small_sky_source.csv\"],\n", + " output_path=\".\",\n", + " file_reader=\"csv\",\n", + " ra_column=\"source_ra\",\n", + " dec_column=\"source_dec\",\n", + " catalog_type=\"source\",\n", + " output_artifact_name=\"small_sky_order3_source\",\n", + " constant_healpix_order=3,\n", + " overwrite=True,\n", + " tmp_dir=tmp_dir,\n", + ")\n", + "runner.pipeline_with_client(args, client)\n", + "\n", + "args = MarginCacheArguments(\n", + " input_catalog_path=\"small_sky_order3_source\",\n", + " output_path=\".\",\n", + " output_artifact_name=\"small_sky_order3_source_margin\",\n", + " margin_threshold=300,\n", + " margin_order=7,\n", + " overwrite=True,\n", + " tmp_dir=tmp_dir,\n", + ")\n", + "runner.pipeline_with_client(args, client)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -434,13 +475,6 @@ "tmp_path.cleanup()\n", "client.close()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -459,7 +493,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=264.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=264.parquet new file mode 100644 index 00000000..e9e9ab5a Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=264.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=707.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=707.parquet new file mode 100644 index 00000000..a4977976 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=707.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=708.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=708.parquet new file mode 100644 index 00000000..21057576 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=708.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=709.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=709.parquet new file mode 100644 index 00000000..90038c5d Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=709.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=710.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=710.parquet new file mode 100644 index 00000000..d1473bda Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=710.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=711.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=711.parquet new file mode 100644 index 00000000..bd468933 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=711.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=712.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=712.parquet new file mode 100644 index 00000000..ab0532b9 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=712.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=713.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=713.parquet new file mode 100644 index 00000000..b6e228b9 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=713.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=714.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=714.parquet new file mode 100644 index 00000000..7662498c Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=714.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=715.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=715.parquet new file mode 100644 index 00000000..1f8ae6c6 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=715.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=716.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=716.parquet new file mode 100644 index 00000000..c8259aa5 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=716.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=717.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=717.parquet new file mode 100644 index 00000000..ae239cac Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=717.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=718.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=718.parquet new file mode 100644 index 00000000..b997223e Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=718.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=719.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=719.parquet new file mode 100644 index 00000000..dc823f81 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=719.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=720.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=720.parquet new file mode 100644 index 00000000..78b45ed6 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=720.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=723.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=723.parquet new file mode 100644 index 00000000..9d40eea1 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=723.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=724.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=724.parquet new file mode 100644 index 00000000..0fc446ee Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=724.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=726.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=726.parquet new file mode 100644 index 00000000..01a79d6b Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=726.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=727.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=727.parquet new file mode 100644 index 00000000..d32cf4cb Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=727.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=728.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=728.parquet new file mode 100644 index 00000000..a3b70329 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=728.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=729.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=729.parquet new file mode 100644 index 00000000..c2aac2aa Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=729.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=730.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=730.parquet new file mode 100644 index 00000000..02607770 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=730.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=732.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=732.parquet new file mode 100644 index 00000000..a365e798 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=732.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=733.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=733.parquet new file mode 100644 index 00000000..366ff44f Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=733.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=734.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=734.parquet new file mode 100644 index 00000000..7dfbc66a Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=734.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=735.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=735.parquet new file mode 100644 index 00000000..2f64ffdc Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=735.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=736.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=736.parquet new file mode 100644 index 00000000..991491a1 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=736.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=737.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=737.parquet new file mode 100644 index 00000000..34f1bcdf Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=737.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=738.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=738.parquet new file mode 100644 index 00000000..48b4c810 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=738.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=739.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=739.parquet new file mode 100644 index 00000000..e3692f5d Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=739.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=740.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=740.parquet new file mode 100644 index 00000000..392c7544 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=740.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=741.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=741.parquet new file mode 100644 index 00000000..fcd858c3 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=741.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=742.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=742.parquet new file mode 100644 index 00000000..8620cdb3 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=742.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=743.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=743.parquet new file mode 100644 index 00000000..b7f25d5d Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=743.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=745.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=745.parquet new file mode 100644 index 00000000..a177a2ed Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=745.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=747.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=747.parquet new file mode 100644 index 00000000..e0555795 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=747.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=748.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=748.parquet new file mode 100644 index 00000000..285d898b Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=748.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=750.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=750.parquet new file mode 100644 index 00000000..bd780352 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=750.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=752.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=752.parquet new file mode 100644 index 00000000..91b9ff7d Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=752.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=753.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=753.parquet new file mode 100644 index 00000000..fbe8b1ac Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=753.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=754.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=754.parquet new file mode 100644 index 00000000..3fd53664 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=754.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=755.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=755.parquet new file mode 100644 index 00000000..1d347762 Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=755.parquet differ diff --git a/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=760.parquet b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=760.parquet new file mode 100644 index 00000000..5bbe41ed Binary files /dev/null and b/tests/data/small_sky_order3_source/Norder=3/Dir=0/Npix=760.parquet differ diff --git a/tests/data/small_sky_order3_source/_common_metadata b/tests/data/small_sky_order3_source/_common_metadata new file mode 100644 index 00000000..5d6359eb Binary files /dev/null and b/tests/data/small_sky_order3_source/_common_metadata differ diff --git a/tests/data/small_sky_order3_source/_metadata b/tests/data/small_sky_order3_source/_metadata new file mode 100644 index 00000000..593caed2 Binary files /dev/null and b/tests/data/small_sky_order3_source/_metadata differ diff --git a/tests/data/small_sky_order3_source/catalog_info.json b/tests/data/small_sky_order3_source/catalog_info.json new file mode 100644 index 00000000..e51c0c29 --- /dev/null +++ b/tests/data/small_sky_order3_source/catalog_info.json @@ -0,0 +1,8 @@ +{ + "catalog_name": "small_sky_order3_source", + "catalog_type": "source", + "total_rows": 17161, + "epoch": "J2000", + "ra_column": "source_ra", + "dec_column": "source_dec" +} diff --git a/tests/data/small_sky_order3_source/partition_info.csv b/tests/data/small_sky_order3_source/partition_info.csv new file mode 100644 index 00000000..21091ba3 --- /dev/null +++ b/tests/data/small_sky_order3_source/partition_info.csv @@ -0,0 +1,44 @@ +Norder,Npix,Dir +3,264,0 +3,707,0 +3,708,0 +3,709,0 +3,710,0 +3,711,0 +3,712,0 +3,713,0 +3,714,0 +3,715,0 +3,716,0 +3,717,0 +3,718,0 +3,719,0 +3,720,0 +3,723,0 +3,724,0 +3,726,0 +3,727,0 +3,728,0 +3,729,0 +3,730,0 +3,732,0 +3,733,0 +3,734,0 +3,735,0 +3,736,0 +3,737,0 +3,738,0 +3,739,0 +3,740,0 +3,741,0 +3,742,0 +3,743,0 +3,745,0 +3,747,0 +3,748,0 +3,750,0 +3,752,0 +3,753,0 +3,754,0 +3,755,0 +3,760,0 diff --git a/tests/data/small_sky_order3_source/provenance_info.json b/tests/data/small_sky_order3_source/provenance_info.json new file mode 100644 index 00000000..53988eef --- /dev/null +++ b/tests/data/small_sky_order3_source/provenance_info.json @@ -0,0 +1,53 @@ +{ + "catalog_name": "small_sky_order3_source", + "catalog_type": "source", + "total_rows": 17161, + "epoch": "J2000", + "ra_column": "source_ra", + "dec_column": "source_dec", + "version": "0.2.7.dev15+g85ec4a0", + "generation_date": "2024.03.08", + "tool_args": { + "tool_name": "hipscat_import", + "version": "0.2.5.dev8+g9d0bfc4", + "runtime_args": { + "catalog_name": "small_sky_order3_source", + "output_path": ".", + "output_artifact_name": "small_sky_order3_source", + "tmp_dir": "/var/folders/x4/rmzh8l_s0zxc74nwr72z12340000gn/T/tmpuqum4py3", + "overwrite": true, + "dask_tmp": "", + "dask_n_workers": 1, + "dask_threads_per_worker": 1, + "catalog_path": "./small_sky_order3_source", + "tmp_path": "/var/folders/x4/rmzh8l_s0zxc74nwr72z12340000gn/T/tmpuqum4py3/small_sky_order3_source/intermediate", + "epoch": "J2000", + "catalog_type": "source", + "input_path": null, + "input_paths": [ + "raw/small_sky_source/small_sky_source.csv" + ], + "input_file_list": [ + "raw/small_sky_source/small_sky_source.csv" + ], + "ra_column": "source_ra", + "dec_column": "source_dec", + "use_hipscat_index": false, + "sort_columns": null, + "constant_healpix_order": 3, + "highest_healpix_order": 7, + "pixel_threshold": 1000000, + "mapping_healpix_order": 3, + "debug_stats_only": false, + "file_reader_info": { + "input_reader_type": "CsvReader", + "chunksize": 500000, + "header": "infer", + "schema_file": null, + "separator": ",", + "column_names": null, + "type_map": {} + } + } + } +} diff --git a/tests/data/small_sky_order3_source_margin/Norder=2/Dir=0/Npix=189.parquet b/tests/data/small_sky_order3_source_margin/Norder=2/Dir=0/Npix=189.parquet new file mode 100644 index 00000000..8330c3df Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=2/Dir=0/Npix=189.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=264.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=264.parquet new file mode 100644 index 00000000..4d41cddc Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=264.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=707.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=707.parquet new file mode 100644 index 00000000..49403e4e Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=707.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=708.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=708.parquet new file mode 100644 index 00000000..b94fd1fc Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=708.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=709.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=709.parquet new file mode 100644 index 00000000..f8f4666a Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=709.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=710.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=710.parquet new file mode 100644 index 00000000..468478b3 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=710.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=713.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=713.parquet new file mode 100644 index 00000000..3f625c89 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=713.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=715.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=715.parquet new file mode 100644 index 00000000..aade981a Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=715.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=716.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=716.parquet new file mode 100644 index 00000000..78efb13e Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=716.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=717.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=717.parquet new file mode 100644 index 00000000..ebaf16a7 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=717.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=718.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=718.parquet new file mode 100644 index 00000000..c4cb1f40 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=718.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=719.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=719.parquet new file mode 100644 index 00000000..c905563e Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=719.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=726.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=726.parquet new file mode 100644 index 00000000..a4276a91 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=726.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=728.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=728.parquet new file mode 100644 index 00000000..f37bcd5e Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=728.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=729.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=729.parquet new file mode 100644 index 00000000..73e36a81 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=729.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=730.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=730.parquet new file mode 100644 index 00000000..ff7a6268 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=730.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=732.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=732.parquet new file mode 100644 index 00000000..dd2ac86a Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=732.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=733.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=733.parquet new file mode 100644 index 00000000..a94b0da2 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=733.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=734.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=734.parquet new file mode 100644 index 00000000..4f4a2384 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=734.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=739.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=739.parquet new file mode 100644 index 00000000..351be5c2 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=739.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=740.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=740.parquet new file mode 100644 index 00000000..c8e3d62d Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=740.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=741.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=741.parquet new file mode 100644 index 00000000..1f9da088 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=741.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=742.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=742.parquet new file mode 100644 index 00000000..35fe7c3c Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=742.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=743.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=743.parquet new file mode 100644 index 00000000..5869992d Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=743.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=745.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=745.parquet new file mode 100644 index 00000000..7690b434 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=745.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=747.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=747.parquet new file mode 100644 index 00000000..5cfbede4 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=747.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=748.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=748.parquet new file mode 100644 index 00000000..4ee34f14 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=748.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=750.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=750.parquet new file mode 100644 index 00000000..8ab22eb8 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=750.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=752.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=752.parquet new file mode 100644 index 00000000..07e23006 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=752.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=753.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=753.parquet new file mode 100644 index 00000000..14cacf36 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=753.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=754.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=754.parquet new file mode 100644 index 00000000..03462ea9 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=754.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=755.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=755.parquet new file mode 100644 index 00000000..97b07a13 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=755.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=760.parquet b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=760.parquet new file mode 100644 index 00000000..6f9cb8f3 Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/Norder=3/Dir=0/Npix=760.parquet differ diff --git a/tests/data/small_sky_order3_source_margin/_common_metadata b/tests/data/small_sky_order3_source_margin/_common_metadata new file mode 100644 index 00000000..340931ce Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/_common_metadata differ diff --git a/tests/data/small_sky_order3_source_margin/_metadata b/tests/data/small_sky_order3_source_margin/_metadata new file mode 100644 index 00000000..9178e54b Binary files /dev/null and b/tests/data/small_sky_order3_source_margin/_metadata differ diff --git a/tests/data/small_sky_order3_source_margin/catalog_info.json b/tests/data/small_sky_order3_source_margin/catalog_info.json new file mode 100644 index 00000000..502973d3 --- /dev/null +++ b/tests/data/small_sky_order3_source_margin/catalog_info.json @@ -0,0 +1,7 @@ +{ + "catalog_name": "small_sky_order3_source_margin", + "catalog_type": "margin", + "total_rows": 1025, + "primary_catalog": "small_sky_order3_source", + "margin_threshold": 300 +} diff --git a/tests/data/small_sky_order3_source_margin/partition_info.csv b/tests/data/small_sky_order3_source_margin/partition_info.csv new file mode 100644 index 00000000..9f79990e --- /dev/null +++ b/tests/data/small_sky_order3_source_margin/partition_info.csv @@ -0,0 +1,34 @@ +Norder,Npix,Dir +3,264,0 +3,707,0 +3,708,0 +3,709,0 +3,710,0 +3,713,0 +3,715,0 +3,716,0 +3,717,0 +3,718,0 +3,719,0 +3,726,0 +3,728,0 +3,729,0 +3,730,0 +3,732,0 +3,733,0 +3,734,0 +3,739,0 +3,740,0 +3,741,0 +3,742,0 +3,743,0 +3,745,0 +3,747,0 +3,748,0 +3,750,0 +3,752,0 +3,753,0 +3,754,0 +3,755,0 +2,189,0 +3,760,0 diff --git a/tests/data/small_sky_order3_source_margin/provenance_info.json b/tests/data/small_sky_order3_source_margin/provenance_info.json new file mode 100644 index 00000000..62211d14 --- /dev/null +++ b/tests/data/small_sky_order3_source_margin/provenance_info.json @@ -0,0 +1,28 @@ +{ + "catalog_name": "small_sky_order3_source_margin", + "catalog_type": "margin", + "total_rows": 1025, + "primary_catalog": "small_sky_order3_source", + "margin_threshold": 300, + "version": "0.2.7.dev15+g85ec4a0", + "generation_date": "2024.03.08", + "tool_args": { + "tool_name": "hipscat_import", + "version": "0.2.5.dev8+g9d0bfc4", + "runtime_args": { + "catalog_name": "small_sky_order3_source_margin", + "output_path": ".", + "output_artifact_name": "small_sky_order3_source_margin", + "tmp_dir": "/var/folders/x4/rmzh8l_s0zxc74nwr72z12340000gn/T/tmpuqum4py3", + "overwrite": true, + "dask_tmp": "", + "dask_n_workers": 1, + "dask_threads_per_worker": 1, + "catalog_path": "./small_sky_order3_source_margin", + "tmp_path": "/var/folders/x4/rmzh8l_s0zxc74nwr72z12340000gn/T/tmpuqum4py3/small_sky_order3_source_margin/intermediate", + "input_catalog_path": "small_sky_order3_source", + "margin_threshold": 300, + "margin_order": 7 + } + } +} diff --git a/tests/lsdb/loaders/dataframe/test_from_dataframe.py b/tests/lsdb/loaders/dataframe/test_from_dataframe.py index 56cc764f..cb34f4a8 100644 --- a/tests/lsdb/loaders/dataframe/test_from_dataframe.py +++ b/tests/lsdb/loaders/dataframe/test_from_dataframe.py @@ -171,7 +171,7 @@ def test_catalog_pixels_nested_ordering(small_sky_source_df): npt.assert_array_equal(argsort, np.arange(0, 14)) -def test_from_dataframe_sky_source_with_margins(small_sky_source_df, small_sky_source_margin_catalog): +def test_from_dataframe_small_sky_source_with_margins(small_sky_source_df, small_sky_source_margin_catalog): catalog = lsdb.from_dataframe( small_sky_source_df, ra_column="source_ra", @@ -179,17 +179,49 @@ def test_from_dataframe_sky_source_with_margins(small_sky_source_df, small_sky_s highest_order=2, threshold=3000, margin_order=8, - margin_threshold=180.0, + margin_threshold=180, ) + assert catalog.margin is not None assert isinstance(catalog.margin, MarginCatalog) + assert catalog.margin.get_healpix_pixels() == small_sky_source_margin_catalog.get_healpix_pixels() + + # The points of this margin catalog are present in one partition only + # so we are able to perform the comparison between the computed results pd.testing.assert_frame_equal( - small_sky_source_margin_catalog.compute(), - catalog.margin.compute(), + catalog.margin.compute().sort_index(), + small_sky_source_margin_catalog.compute().sort_index(), check_like=True, ) +def test_from_dataframe_small_sky_order3_source_with_margins( + small_sky_source_df, small_sky_order3_source_margin_catalog +): + catalog = lsdb.from_dataframe( + small_sky_source_df, + ra_column="source_ra", + dec_column="source_dec", + lowest_order=3, + highest_order=3, + margin_order=7, + margin_threshold=300, + ) + + assert catalog.margin is not None + assert isinstance(catalog.margin, MarginCatalog) + assert catalog.margin.get_healpix_pixels() == small_sky_order3_source_margin_catalog.get_healpix_pixels() + + # There are points in the catalog which are present in several margin pixel partitions, + # so we need to compare each pixel-partition pair + for pixel in small_sky_order3_source_margin_catalog.get_healpix_pixels(): + partition_1 = small_sky_order3_source_margin_catalog.get_partition(pixel.order, pixel.pixel) + partition_2 = catalog.margin.get_partition(pixel.order, pixel.pixel) + pd.testing.assert_frame_equal( + partition_1.compute().sort_index(), partition_2.compute().sort_index(), check_like=True + ) + + def test_from_dataframe_invalid_margin_order(small_sky_source_df): with pytest.raises(ValueError, match="margin_order"): lsdb.from_dataframe( @@ -199,3 +231,14 @@ def test_from_dataframe_invalid_margin_order(small_sky_source_df): lowest_order=2, margin_order=1, ) + + +def test_from_dataframe_margin_is_empty(small_sky_order1_df): + catalog = lsdb.from_dataframe( + small_sky_order1_df, + catalog_name="small_sky_order1", + catalog_type="object", + highest_order=5, + threshold=100, + ) + assert catalog.margin is None