Skip to content

Commit

Permalink
Merge pull request #40 from astronomy-commons/issue/38/read-hipscat-i…
Browse files Browse the repository at this point in the history
…ndex-column-from-hipscat

Import HiPSCat index column
  • Loading branch information
camposandro authored Oct 20, 2023
2 parents 280d5b2 + c2ae87d commit 0d64473
Show file tree
Hide file tree
Showing 6 changed files with 15 additions and 10 deletions.
3 changes: 3 additions & 0 deletions benchmarks/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
"branches": [
"HEAD"
],
"install_command": [
"python -mpip install -r requirements.txt {wheel_file}"
],
"build_command": [
"python -m build --wheel -o {build_cache_dir} {build_dir}"
],
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
git+https://github.com/astronomy-commons/hipscat.git@main
5 changes: 3 additions & 2 deletions src/lsdb/core/crossmatch/kdtree_match.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import healpy as hp
import numpy as np
import pandas as pd
from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN
from sklearn.neighbors import KDTree

from lsdb.core.crossmatch.abstract_crossmatch_algorithm import AbstractCrossmatchAlgorithm
Expand Down Expand Up @@ -46,7 +47,7 @@ def crossmatch(
self._rename_columns_with_suffix(self.right, self.suffixes[1])

# concat dataframes together
self.left.index.name = "_hipscat_index"
self.left.index.name = HIPSCAT_ID_COLUMN
left_join_part = self.left.iloc[left_ids_filtered].reset_index()
right_join_part = self.right.iloc[right_ids_filtered].reset_index(drop=True)
out = pd.concat(
Expand All @@ -56,7 +57,7 @@ def crossmatch(
],
axis=1,
)
out.set_index("_hipscat_index", inplace=True)
out.set_index(HIPSCAT_ID_COLUMN, inplace=True)
out["_DIST"] = distances

return out
Expand Down
4 changes: 2 additions & 2 deletions src/lsdb/dask/crossmatch_catalog_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from dask.delayed import Delayed
from hipscat.pixel_math import HealpixPixel
from hipscat.pixel_math.hipscat_id import healpix_to_hipscat_id
from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN, healpix_to_hipscat_id
from hipscat.pixel_tree import PixelAlignment, PixelAlignmentType, align_trees

from lsdb.core.crossmatch.abstract_crossmatch_algorithm import AbstractCrossmatchAlgorithm
Expand Down Expand Up @@ -156,7 +156,7 @@ def crossmatch_catalog_data(
meta[name + suffixes[1]] = pd.Series(dtype=col_type)
meta["_DIST"] = pd.Series(dtype=np.dtype("float64"))
meta_df = pd.DataFrame(meta)
meta_df.index.name = "_hipscat_index"
meta_df.index.name = HIPSCAT_ID_COLUMN

# create dask df from delayed partitions
ddf = dd.from_delayed(joined_partitions, meta=meta_df)
Expand Down
9 changes: 4 additions & 5 deletions src/lsdb/loaders/dataframe/dataframe_catalog_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from hipscat.catalog import CatalogType
from hipscat.catalog.catalog_info import CatalogInfo
from hipscat.pixel_math import HealpixPixel, generate_histogram
from hipscat.pixel_math.hipscat_id import compute_hipscat_id, healpix_to_hipscat_id
from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN, compute_hipscat_id, healpix_to_hipscat_id
from typing_extensions import TypeAlias

from lsdb.catalog.catalog import Catalog, DaskDFPixelMap
Expand All @@ -25,7 +25,6 @@ class DataframeCatalogLoader:
"""Creates a HiPSCat formatted Catalog from a Pandas Dataframe"""

DEFAULT_THRESHOLD = 100_000
HIPSCAT_INDEX_COLUMN = "_hipscat_index"

def __init__(
self,
Expand Down Expand Up @@ -107,12 +106,12 @@ def load_catalog(self) -> Catalog:

def _set_hipscat_index(self):
"""Generates the hipscat indices for each data point and assigns
the _hipscat_index column as the Dataframe index."""
self.df[self.HIPSCAT_INDEX_COLUMN] = compute_hipscat_id(
the hipscat index column as the Dataframe index."""
self.df[HIPSCAT_ID_COLUMN] = compute_hipscat_id(
ra_values=self.df[self.catalog_info.ra_column],
dec_values=self.df[self.catalog_info.dec_column],
)
self.df.set_index(self.HIPSCAT_INDEX_COLUMN, inplace=True)
self.df.set_index(HIPSCAT_ID_COLUMN, inplace=True)

def _compute_pixel_map(self) -> Dict[HealpixPixel, HealpixInfo]:
"""Compute object histogram and generate the mapping between
Expand Down
3 changes: 2 additions & 1 deletion tests/lsdb/catalog/test_crossmatch.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pandas as pd
import pytest
from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_COLUMN

from lsdb.core.crossmatch.abstract_crossmatch_algorithm import AbstractCrossmatchAlgorithm

Expand Down Expand Up @@ -80,7 +81,7 @@ def crossmatch(self, mock_results: pd.DataFrame = None):
],
axis=1,
)
out.set_index("_hipscat_index", inplace=True)
out.set_index(HIPSCAT_ID_COLUMN, inplace=True)
out["_DIST"] = mock_results["dist"].to_numpy()

return out

0 comments on commit 0d64473

Please sign in to comment.