Skip to content

Commit

Permalink
Merge pull request #42 from astronomy-commons/delucchi/pylint
Browse files Browse the repository at this point in the history
Re-factor get_pixels
  • Loading branch information
delucchi-cmu authored Oct 19, 2023
2 parents 404dc17 + 788584b commit 280d5b2
Show file tree
Hide file tree
Showing 7 changed files with 23 additions and 33 deletions.
2 changes: 1 addition & 1 deletion benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import lsdb

TEST_DIR = os.path.join(os.path.dirname(__file__), '..', 'tests')
TEST_DIR = os.path.join(os.path.dirname(__file__), "..", "tests")
DATA_DIR_NAME = "data"
SMALL_SKY_DIR_NAME = "small_sky"
SMALL_SKY_XMATCH_NAME = "small_sky_xmatch"
Expand Down
13 changes: 5 additions & 8 deletions src/lsdb/catalog/catalog.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from __future__ import annotations

import dataclasses
from typing import Dict, Tuple, Type, cast
from typing import Dict, List, Tuple, Type, cast

import dask.dataframe as dd
import hipscat as hc
import pandas as pd
from hipscat.pixel_math import HealpixPixel

from lsdb.catalog.dataset.dataset import Dataset
Expand Down Expand Up @@ -48,13 +47,13 @@ def __init__(
super().__init__(ddf, hc_structure)
self._ddf_pixel_map = ddf_pixel_map

def get_pixels(self) -> pd.DataFrame:
def get_healpix_pixels(self) -> List[HealpixPixel]:
"""Get all HEALPix pixels that are contained in the catalog
Returns:
Data frame with per-pixel data.
List of all Healpix pixels in the catalog
"""
return self.hc_structure.get_pixels()
return self.hc_structure.get_healpix_pixels()

def get_partition(self, order: int, pixel: int) -> dd.DataFrame:
"""Get the dask partition for a given HEALPix pixel
Expand Down Expand Up @@ -209,9 +208,7 @@ def cone_search(self, ra: float, dec: float, radius: float):
filtered_hc_structure = self.hc_structure.filter_by_cone(ra, dec, radius)
pixels_in_cone = filtered_hc_structure.get_healpix_pixels()
partitions = self._ddf.to_delayed()
partitions_in_cone = [
partitions[self._ddf_pixel_map[pixel]] for pixel in pixels_in_cone
]
partitions_in_cone = [partitions[self._ddf_pixel_map[pixel]] for pixel in pixels_in_cone]
filtered_partitions = [
cone_filter(partition, ra, dec, radius, self.hc_structure) for partition in partitions_in_cone
]
Expand Down
4 changes: 2 additions & 2 deletions src/lsdb/core/cone_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ def cone_filter(data_frame: pd.DataFrame, ra, dec, radius, metadata: hc.catalog.
"""
df_ras = data_frame[metadata.catalog_info.ra_column].values
df_decs = data_frame[metadata.catalog_info.dec_column].values
df_coords = SkyCoord(df_ras, df_decs, unit='deg')
center_coord = SkyCoord(ra, dec, unit='deg')
df_coords = SkyCoord(df_ras, df_decs, unit="deg")
center_coord = SkyCoord(ra, dec, unit="deg")
df_separations = df_coords.separation(center_coord).value
data_frame["_CONE_SEP"] = df_separations
data_frame = data_frame.loc[data_frame["_CONE_SEP"] < radius]
Expand Down
7 changes: 1 addition & 6 deletions src/lsdb/loaders/hipscat/hipscat_catalog_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,9 @@ def _load_dask_df_and_map(self, catalog: hc.catalog.Catalog) -> Tuple[dd.DataFra
return ddf, pixel_to_index_map

def _get_ordered_pixel_list(self, catalog: hc.catalog.Catalog) -> List[HealpixPixel]:
pixels = []
for _, row in catalog.get_pixels().iterrows():
order = row[hc.catalog.PartitionInfo.METADATA_ORDER_COLUMN_NAME]
pixel = row[hc.catalog.PartitionInfo.METADATA_PIXEL_COLUMN_NAME]
pixels.append(HealpixPixel(order, pixel))
# Sort pixels by pixel number at highest order
sorted_pixels = sorted(
pixels,
catalog.get_healpix_pixels(),
key=lambda pixel: (4 ** (HIPSCAT_ID_HEALPIX_ORDER - pixel.order)) * pixel.pixel,
)
return sorted_pixels
Expand Down
10 changes: 5 additions & 5 deletions tests/lsdb/catalog/test_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@


def test_catalog_pixels_equals_hc_catalog_pixels(small_sky_order1_catalog, small_sky_order1_hipscat_catalog):
pd.testing.assert_frame_equal(
small_sky_order1_catalog.get_pixels(), small_sky_order1_hipscat_catalog.get_pixels()
assert (
small_sky_order1_catalog.get_healpix_pixels() == small_sky_order1_hipscat_catalog.get_healpix_pixels()
)


Expand All @@ -22,9 +22,9 @@ def test_catalog_compute_equals_ddf_compute(small_sky_order1_catalog):


def test_get_catalog_partition_gets_correct_partition(small_sky_order1_catalog):
for _, row in small_sky_order1_catalog.get_pixels().iterrows():
hp_order = row["Norder"]
hp_pixel = row["Npix"]
for healpix_pixel in small_sky_order1_catalog.get_healpix_pixels():
hp_order = healpix_pixel.order
hp_pixel = healpix_pixel.pixel
partition = small_sky_order1_catalog.get_partition(hp_order, hp_pixel)
pixel = HealpixPixel(order=hp_order, pixel=hp_pixel)
partition_index = small_sky_order1_catalog._ddf_pixel_map[pixel]
Expand Down
2 changes: 1 addition & 1 deletion tests/lsdb/catalog/test_cone_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def test_cone_search_filters_partitions(small_sky_order1_catalog):
radius = 20
hc_conesearch = small_sky_order1_catalog.hc_structure.filter_by_cone(ra, dec, radius)
consearch_catalog = small_sky_order1_catalog.cone_search(ra, dec, radius)
assert len(hc_conesearch.get_healpix_pixels()) == len(consearch_catalog.get_pixels())
assert len(hc_conesearch.get_healpix_pixels()) == len(consearch_catalog.get_healpix_pixels())
assert len(hc_conesearch.get_healpix_pixels()) == consearch_catalog._ddf.npartitions
print(hc_conesearch.get_healpix_pixels())
for pixel in hc_conesearch.get_healpix_pixels():
Expand Down
18 changes: 8 additions & 10 deletions tests/lsdb/loaders/hipscat/test_read_hipscat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,13 @@ def test_read_hipscat(small_sky_order1_dir, small_sky_order1_hipscat_catalog):
catalog = lsdb.read_hipscat(small_sky_order1_dir)
assert isinstance(catalog, lsdb.Catalog)
assert catalog.hc_structure.catalog_base_dir == small_sky_order1_hipscat_catalog.catalog_base_dir
pd.testing.assert_frame_equal(catalog.get_pixels(), small_sky_order1_hipscat_catalog.get_pixels())
assert catalog.get_healpix_pixels() == small_sky_order1_hipscat_catalog.get_healpix_pixels()


def test_pixels_in_map_equal_catalog_pixels(small_sky_order1_dir, small_sky_order1_hipscat_catalog):
catalog = lsdb.read_hipscat(small_sky_order1_dir)
for _, row in small_sky_order1_hipscat_catalog.get_pixels().iterrows():
hp_order = row["Norder"]
hp_pixel = row["Npix"]
catalog.get_partition(hp_order, hp_pixel)
for healpix_pixel in small_sky_order1_hipscat_catalog.get_healpix_pixels():
catalog.get_partition(healpix_pixel.order, healpix_pixel.pixel)


def test_wrong_pixel_raises_value_error(small_sky_order1_dir):
Expand All @@ -28,9 +26,9 @@ def test_wrong_pixel_raises_value_error(small_sky_order1_dir):

def test_parquet_data_in_partitions_match_files(small_sky_order1_dir, small_sky_order1_hipscat_catalog):
catalog = lsdb.read_hipscat(small_sky_order1_dir)
for _, row in small_sky_order1_hipscat_catalog.get_pixels().iterrows():
hp_order = row["Norder"]
hp_pixel = row["Npix"]
for healpix_pixel in small_sky_order1_hipscat_catalog.get_healpix_pixels():
hp_order = healpix_pixel.order
hp_pixel = healpix_pixel.pixel
partition = catalog.get_partition(hp_order, hp_pixel)
partition_df = partition.compute()
parquet_path = hc.io.paths.pixel_catalog_file(
Expand All @@ -44,14 +42,14 @@ def test_read_hipscat_specify_catalog_type(small_sky_catalog, small_sky_dir):
catalog = lsdb.read_hipscat(small_sky_dir, catalog_type=lsdb.Catalog)
assert isinstance(catalog, lsdb.Catalog)
pd.testing.assert_frame_equal(catalog.compute(), small_sky_catalog.compute())
pd.testing.assert_frame_equal(catalog.get_pixels(), small_sky_catalog.get_pixels())
assert catalog.get_healpix_pixels() == small_sky_catalog.get_healpix_pixels()
assert catalog.hc_structure.catalog_info == small_sky_catalog.hc_structure.catalog_info


def test_read_hipscat_no_parquet_metadata(small_sky_catalog, small_sky_no_metadata_dir):
catalog = lsdb.read_hipscat(small_sky_no_metadata_dir)
pd.testing.assert_frame_equal(catalog.compute(), small_sky_catalog.compute())
pd.testing.assert_frame_equal(catalog.get_pixels(), small_sky_catalog.get_pixels())
assert catalog.get_healpix_pixels() == small_sky_catalog.get_healpix_pixels()
assert catalog.hc_structure.catalog_info == small_sky_catalog.hc_structure.catalog_info


Expand Down

0 comments on commit 280d5b2

Please sign in to comment.