Skip to content

Commit

Permalink
Fix test data for removing partition_info (#65)
Browse files Browse the repository at this point in the history
* Fix test data for removing partition_info

* And remove untested lines.

* Address lint warning.
  • Loading branch information
delucchi-cmu authored Nov 14, 2023
1 parent e9b2562 commit d57f770
Show file tree
Hide file tree
Showing 11 changed files with 8 additions and 47 deletions.
6 changes: 0 additions & 6 deletions cloud_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
DATA_DIR_NAME = "data"
SMALL_SKY_DIR_NAME = "small_sky"
SMALL_SKY_XMATCH_NAME = "small_sky_xmatch"
SMALL_SKY_NO_METADATA_DIR_NAME = "small_sky_no_metadata"
SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1"
XMATCH_CORRECT_FILE = "xmatch_correct.csv"
XMATCH_CORRECT_005_FILE = "xmatch_correct_0_005.csv"
Expand Down Expand Up @@ -67,11 +66,6 @@ def small_sky_xmatch_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_XMATCH_NAME)


@pytest.fixture
def small_sky_no_metadata_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_NO_METADATA_DIR_NAME)


@pytest.fixture
def small_sky_order1_dir_cloud(test_data_dir_cloud):
return os.path.join(test_data_dir_cloud, SMALL_SKY_ORDER1_DIR_NAME)
Expand Down
11 changes: 0 additions & 11 deletions cloud_tests/lsdb/loaders/hipscat/test_read_hipscat.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,6 @@ def test_read_hipscat_specify_catalog_type(
assert catalog.hc_structure.catalog_info == small_sky_catalog_cloud.hc_structure.catalog_info


def test_read_hipscat_no_parquet_metadata(
small_sky_catalog_cloud, small_sky_no_metadata_dir_cloud, example_cloud_storage_options
):
catalog = lsdb.read_hipscat(
small_sky_no_metadata_dir_cloud, storage_options=example_cloud_storage_options
)
pd.testing.assert_frame_equal(catalog.compute(), small_sky_catalog_cloud.compute())
assert catalog.hc_structure.get_healpix_pixels() == small_sky_catalog_cloud.get_healpix_pixels()
assert catalog.hc_structure.catalog_info == small_sky_catalog_cloud.hc_structure.catalog_info


def test_read_hipscat_specify_wrong_catalog_type(small_sky_dir_cloud):
with pytest.raises(ValueError):
lsdb.read_hipscat(small_sky_dir_cloud, catalog_type=int)
13 changes: 4 additions & 9 deletions src/lsdb/loaders/hipscat/hipscat_catalog_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import dask.dataframe as dd
import hipscat as hc
import pyarrow
from hipscat.io.file_io import file_io, file_pointer
from hipscat.io.file_io import file_io
from hipscat.pixel_math import HealpixPixel
from hipscat.pixel_math.hipscat_id import HIPSCAT_ID_HEALPIX_ORDER

Expand Down Expand Up @@ -74,7 +74,7 @@ def _get_paths_from_pixels(
def _load_df_from_paths(
self, catalog: hc.catalog.Catalog, paths: List[hc.io.FilePointer]
) -> dd.DataFrame:
metadata_schema = self._load_parquet_metadata_schema(catalog, paths)
metadata_schema = self._load_parquet_metadata_schema(catalog)
dask_meta_schema = metadata_schema.empty_table().to_pandas()
ddf = dd.from_map(
file_io.read_parquet_file_to_pandas,
Expand All @@ -84,12 +84,7 @@ def _load_df_from_paths(
)
return ddf

def _load_parquet_metadata_schema(
self, catalog: hc.catalog.Catalog, paths: List[hc.io.FilePointer]
) -> pyarrow.Schema:
def _load_parquet_metadata_schema(self, catalog: hc.catalog.Catalog) -> pyarrow.Schema:
metadata_pointer = hc.io.paths.get_parquet_metadata_pointer(catalog.catalog_base_dir)
if file_pointer.does_file_or_directory_exist(metadata_pointer, storage_options=self.storage_options):
metadata = file_io.read_parquet_metadata(metadata_pointer, storage_options=self.storage_options)
return metadata.schema.to_arrow_schema()
metadata = file_io.read_parquet_metadata(paths[0], storage_options=self.storage_options)
metadata = file_io.read_parquet_metadata(metadata_pointer, storage_options=self.storage_options)
return metadata.schema.to_arrow_schema()
6 changes: 0 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
DATA_DIR_NAME = "data"
SMALL_SKY_DIR_NAME = "small_sky"
SMALL_SKY_XMATCH_NAME = "small_sky_xmatch"
SMALL_SKY_NO_METADATA_DIR_NAME = "small_sky_no_metadata"
SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1"
SMALL_SKY_ORDER1_CSV = "small_sky_order1.csv"
XMATCH_CORRECT_FILE = "xmatch_correct.csv"
Expand All @@ -34,11 +33,6 @@ def small_sky_xmatch_dir(test_data_dir):
return os.path.join(test_data_dir, SMALL_SKY_XMATCH_NAME)


@pytest.fixture
def small_sky_no_metadata_dir(test_data_dir):
return os.path.join(test_data_dir, SMALL_SKY_NO_METADATA_DIR_NAME)


@pytest.fixture
def small_sky_order1_dir(test_data_dir):
return os.path.join(test_data_dir, SMALL_SKY_ORDER1_DIR_NAME)
Expand Down
Binary file modified tests/data/small_sky_xmatch/Norder=1/Dir=0/Npix=44.parquet
Binary file not shown.
Binary file modified tests/data/small_sky_xmatch/Norder=1/Dir=0/Npix=45.parquet
Binary file not shown.
Binary file modified tests/data/small_sky_xmatch/Norder=1/Dir=0/Npix=46.parquet
Binary file not shown.
Binary file modified tests/data/small_sky_xmatch/_common_metadata
Binary file not shown.
Binary file modified tests/data/small_sky_xmatch/_metadata
Binary file not shown.
12 changes: 4 additions & 8 deletions tests/data/small_sky_xmatch/catalog_info.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
{
"catalog_name": "small_sky_xmatch",
"catalog_type": "object",
"version": "0.0.0",
"generation_date": "2023.8.9",
"total_rows": 111,
"epoch": "J2000",
"ra_kw": "ra",
"dec_kw": "dec",
"id_kw": "id",
"total_objects": 111,
"pixel_threshold": 50
}
"ra_column": "ra",
"dec_column": "dec"
}
7 changes: 0 additions & 7 deletions tests/lsdb/loaders/hipscat/test_read_hipscat.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ def test_read_hipscat_specify_catalog_type(small_sky_catalog, small_sky_dir):
assert catalog.hc_structure.catalog_info == small_sky_catalog.hc_structure.catalog_info


def test_read_hipscat_no_parquet_metadata(small_sky_catalog, small_sky_no_metadata_dir):
catalog = lsdb.read_hipscat(small_sky_no_metadata_dir)
pd.testing.assert_frame_equal(catalog.compute(), small_sky_catalog.compute())
assert catalog.get_healpix_pixels() == small_sky_catalog.get_healpix_pixels()
assert catalog.hc_structure.catalog_info == small_sky_catalog.hc_structure.catalog_info


def test_read_hipscat_specify_wrong_catalog_type(small_sky_dir):
with pytest.raises(ValueError):
lsdb.read_hipscat(small_sky_dir, catalog_type=int)

0 comments on commit d57f770

Please sign in to comment.