Skip to content

Commit

Permalink
Resolve merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
camposandro committed Mar 8, 2024
2 parents 1d8be09 + 2fbc79c commit 17abe64
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 36 deletions.
22 changes: 1 addition & 21 deletions src/lsdb/catalog/catalog.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from __future__ import annotations

import dataclasses
from typing import Any, Dict, List, Tuple, Type, Union
from typing import List, Tuple, Type

import dask.dataframe as dd
import hipscat as hc
import pandas as pd
from hipscat.pixel_math.polygon_filter import SphericalCoordinates

from lsdb import io
from lsdb.catalog.association_catalog import AssociationCatalog
from lsdb.catalog.dataset.healpix_dataset import HealpixDataset
from lsdb.catalog.margin_catalog import MarginCatalog
Expand Down Expand Up @@ -324,25 +323,6 @@ def merge(
suffixes=suffixes,
)

def to_hipscat(
self,
base_catalog_path: str,
catalog_name: Union[str, None] = None,
overwrite: bool = False,
storage_options: Union[Dict[Any, Any], None] = None,
**kwargs,
):
"""Saves the catalog to disk in HiPSCat format
Args:
base_catalog_path (str): Location where catalog is saved to
catalog_name (str): The name of the catalog to be saved
overwrite (bool): If True existing catalog is overwritten
storage_options (dict): Dictionary that contains abstract filesystem credentials
**kwargs: Arguments to pass to the parquet write operations
"""
io.to_hipscat(self, base_catalog_path, catalog_name, overwrite, storage_options, **kwargs)

def join(
self,
other: Catalog,
Expand Down
22 changes: 22 additions & 0 deletions src/lsdb/catalog/dataset/healpix_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import warnings
from typing import Any, Callable, Dict, List, Tuple, cast

Expand All @@ -11,6 +13,7 @@
from hipscat.pixel_math.healpix_pixel_function import get_pixel_argsort
from typing_extensions import Self

from lsdb import io
from lsdb.catalog.dataset.dataset import Dataset
from lsdb.core.plotting.skymap import plot_skymap
from lsdb.core.search.abstract_search import AbstractSearch
Expand Down Expand Up @@ -223,3 +226,22 @@ def skymap(self, func: Callable[[pd.DataFrame, HealpixPixel], Any], **kwargs):
results = dask.compute(*[smdata[pixel] for pixel in pixels])
result_dict = {pixels[i]: results[i] for i in range(len(pixels))}
plot_skymap(result_dict)

def to_hipscat(
self,
base_catalog_path: str,
catalog_name: str | None = None,
overwrite: bool = False,
storage_options: dict | None = None,
**kwargs,
):
"""Saves the catalog to disk in HiPSCat format
Args:
base_catalog_path (str): Location where catalog is saved to
catalog_name (str): The name of the catalog to be saved
overwrite (bool): If True existing catalog is overwritten
storage_options (dict): Dictionary that contains abstract filesystem credentials
**kwargs: Arguments to pass to the parquet write operations
"""
io.to_hipscat(self, base_catalog_path, catalog_name, overwrite, storage_options, **kwargs)
33 changes: 18 additions & 15 deletions src/lsdb/io/to_hipscat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@
import dask
import hipscat as hc
import pandas as pd
from hipscat.catalog.healpix_dataset.healpix_dataset import HealpixDataset as HCHealpixDataset
from hipscat.io import FilePointer
from hipscat.pixel_math import HealpixPixel

from lsdb.types import HealpixInfo

if TYPE_CHECKING:
from lsdb.catalog.catalog import Catalog
from lsdb.catalog.dataset.healpix_dataset import HealpixDataset


@dask.delayed
Expand Down Expand Up @@ -50,19 +51,19 @@ def perform_write(

# pylint: disable=W0212
def to_hipscat(
catalog: Catalog,
catalog: HealpixDataset,
base_catalog_path: str,
catalog_name: Union[str, None] = None,
overwrite: bool = False,
storage_options: Union[Dict[Any, Any], None] = None,
storage_options: dict | None = None,
**kwargs,
):
"""Writes a catalog to disk, in HiPSCat format. The output catalog comprises
partition parquet files and respective metadata, as well as JSON files detailing
partition, catalog and provenance info.
Args:
catalog (Catalog): A catalog to export
catalog (HealpixDataset): A catalog to export
base_catalog_path (str): Location where catalog is saved to
catalog_name (str): The name of the output catalog
overwrite (bool): If True existing catalog is overwritten
Expand Down Expand Up @@ -101,15 +102,15 @@ def to_hipscat(


def write_partitions(
catalog: Catalog,
catalog: HealpixDataset,
base_catalog_dir_fp: FilePointer,
storage_options: Union[Dict[Any, Any], None] = None,
**kwargs,
) -> Dict[HealpixPixel, int]:
"""Saves catalog partitions as parquet to disk
Args:
catalog (Catalog): A catalog to export
catalog (HealpixDataset): A catalog to export
base_catalog_dir_fp (FilePointer): Path to the base directory of the catalog
storage_options (dict): Dictionary that contains abstract filesystem credentials
**kwargs: Arguments to pass to the parquet write operations
Expand Down Expand Up @@ -164,8 +165,8 @@ def _get_partition_info_dict(ddf_points_map: Dict[HealpixPixel, int]) -> Dict[He


def create_modified_catalog_structure(
catalog_structure: hc.catalog.Catalog, catalog_base_dir: str, catalog_name: str, **kwargs
) -> hc.catalog.Catalog:
catalog_structure: HCHealpixDataset, catalog_base_dir: str, catalog_name: str, **kwargs
) -> HCHealpixDataset:
"""Creates a modified version of the HiPSCat catalog structure
Args:
Expand All @@ -188,22 +189,24 @@ def create_modified_catalog_structure(
return new_hc_structure


def _get_provenance_info(catalog_structure: hc.catalog.Catalog) -> dict:
def _get_provenance_info(catalog_structure: HCHealpixDataset) -> dict:
"""Fill all known information in a dictionary for provenance tracking.
Args:
catalog_structure (HCHealpixDataset): The catalog structure
Returns:
dictionary with all argument_name -> argument_value as key -> value pairs.
"""
catalog_info = catalog_structure.catalog_info
args = {
structure_args = {
"catalog_name": catalog_structure.catalog_name,
"output_path": catalog_structure.catalog_path,
"output_catalog_name": catalog_structure.catalog_name,
"catalog_path": catalog_structure.catalog_path,
"epoch": catalog_info.epoch,
"catalog_type": catalog_info.catalog_type,
"ra_column": catalog_info.ra_column,
"dec_column": catalog_info.dec_column,
}
args = {
**structure_args,
**dataclasses.asdict(catalog_structure.catalog_info),
}
provenance_info = {
"tool_name": "lsdb",
Expand Down
16 changes: 16 additions & 0 deletions tests/lsdb/catalog/test_margin_catalog.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import hipscat as hc
import pandas as pd

Expand Down Expand Up @@ -30,3 +32,17 @@ def test_margin_catalog_partitions_correct(small_sky_xmatch_margin_dir):
partition = margin.get_partition(hp_order, hp_pixel)
data = pd.read_parquet(path)
pd.testing.assert_frame_equal(partition.compute(), data)


def test_save_margin_catalog(small_sky_xmatch_margin_catalog, tmp_path):
new_catalog_name = "small_sky_xmatch_margin"
base_catalog_path = Path(tmp_path) / new_catalog_name
small_sky_xmatch_margin_catalog.to_hipscat(base_catalog_path, catalog_name=new_catalog_name)
expected_catalog = lsdb.read_hipscat(base_catalog_path)
assert expected_catalog.hc_structure.catalog_name == new_catalog_name
assert (
expected_catalog.hc_structure.catalog_info
== small_sky_xmatch_margin_catalog.hc_structure.catalog_info
)
assert expected_catalog.get_healpix_pixels() == small_sky_xmatch_margin_catalog.get_healpix_pixels()
pd.testing.assert_frame_equal(expected_catalog.compute(), small_sky_xmatch_margin_catalog._ddf.compute())

0 comments on commit 17abe64

Please sign in to comment.