From 06e30328c6be2ac8742764ffe56f89eaf1001f6d Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Fri, 15 Nov 2024 16:13:31 -0500 Subject: [PATCH] Fixes for the review --- .../pre_executed/ztf-alerts-sne.ipynb | 782 ++++++++++-------- 1 file changed, 427 insertions(+), 355 deletions(-) diff --git a/docs/tutorials/pre_executed/ztf-alerts-sne.ipynb b/docs/tutorials/pre_executed/ztf-alerts-sne.ipynb index c8bb5627..8991c6dc 100644 --- a/docs/tutorials/pre_executed/ztf-alerts-sne.ipynb +++ b/docs/tutorials/pre_executed/ztf-alerts-sne.ipynb @@ -7,8 +7,8 @@ "source": [ "# Search for SN-like light curves in ZTF alerts\n", "\n", - "We will use lsdb package to load a Hats catalog with [ZTF](https://www.ztf.caltech.edu) alerts.\n", - "The dataset contains all alerts sent from the beginning of the survey until 2023-09-13 corresponding to objects having at least 20 detections.\n", + "We will use lsdb package to load a HATS catalog with [ZTF](https://www.ztf.caltech.edu) alerts.\n", + "The dataset contains all alerts sent from the beginning of the survey from 2018-05-04 to 2023-09-13 corresponding to objects having at least 20 detections.\n", "\n", "The dataset is provided by the [ALeRCE](https://alerce.science) broker team.\n", "\n", @@ -34,39 +34,48 @@ }, { "cell_type": "code", + "execution_count": 1, "id": "0e5c43c5", "metadata": { "ExecuteTime": { - "end_time": "2024-10-31T16:13:45.038972Z", - "start_time": "2024-10-31T16:13:40.538905Z" + "end_time": "2024-11-15T20:37:05.269994Z", + "start_time": "2024-11-15T20:37:04.067689Z" } }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note: you may need to restart the kernel to use updated packages.\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ - "%pip install lsdb\n", + "%pip install -q lsdb\n", "# This --only-binary flag is required to avoid installation errors on some systems\n", - "%pip install --only-binary=light-curve light-curve" - ], - "outputs": [], - "execution_count": 1 + "%pip install -q --only-binary=light-curve light-curve" + ] }, { "cell_type": "code", + "execution_count": 2, "id": "af5fdfe0", "metadata": { "ExecuteTime": { - "end_time": "2024-10-31T16:14:01.879589Z", - "start_time": "2024-10-31T16:13:59.192844Z" + "end_time": "2024-11-15T20:37:54.468549Z", + "start_time": "2024-11-15T20:37:52.150445Z" } }, + "outputs": [], "source": [ "import light_curve as licu\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from dask.distributed import Client\n", "from lsdb import read_hats" - ], - "outputs": [], - "execution_count": 2 + ] }, { "cell_type": "markdown", @@ -75,18 +84,20 @@ "source": [ "### Helper function for light-curve plotting\n", "\n", - "The function accepts a pandas data frame and plot a light curve." + "The function accepts a pandas data frame and plots a light curve." ] }, { "cell_type": "code", + "execution_count": 3, "id": "bde20aba", "metadata": { "ExecuteTime": { - "end_time": "2024-10-31T16:15:03.695967Z", - "start_time": "2024-10-31T16:15:03.692865Z" + "end_time": "2024-11-15T20:37:57.622181Z", + "start_time": "2024-11-15T20:37:57.619207Z" } }, + "outputs": [], "source": [ "def plot_lc(lc, nondet, title=None):\n", " \"\"\"Plot light curve with non-detections.\"\"\"\n", @@ -103,9 +114,7 @@ " plt.xlabel(\"MJD\")\n", " plt.ylabel(\"mag\")\n", " plt.gca().invert_yaxis()" - ], - "outputs": [], - "execution_count": 3 + ] }, { "cell_type": "markdown", @@ -118,68 +127,28 @@ "These transformed columns are \"nested data frames\", so each item could be represented by a small pandas dataframe.\n", "We are going to have three nested columns:\n", "\n", - "1. \"lc\", for light curves, each point corrersponds to some alert (detection)\n", + "1. \"lc\", for light curves, each point corresponding to some alert (detection)\n", "2. \"nondet\", for non-detections (upper limits)\n", "3. \"ref\", for ZTF reference objects associated with alerts\n", "\n", - "Here we do not download any data yet, all data access and analysis happens only after `.compute()` is called.\n", + "Here we have not downloaded any data yet, all data access and analysis happens only after `.compute()` is called.\n", "\n", "Here we display two versions of the catalog: the first one is the raw catalog with nested lists, and the second one is the catalog with nested columns." ] }, { "cell_type": "code", + "execution_count": 4, "id": "7c7010f6", "metadata": { "ExecuteTime": { - "end_time": "2024-10-31T16:16:09.566339Z", - "start_time": "2024-10-31T16:16:07.219879Z" + "end_time": "2024-11-15T20:38:02.588603Z", + "start_time": "2024-11-15T20:37:58.987682Z" } }, - "source": [ - "ZTF_ALERTS = \"https://data.lsdb.io/hats/alerce/\"\n", - "\n", - "# Load catalog with nested lists\n", - "raw_catalog = read_hats(\n", - " ZTF_ALERTS,\n", - ")\n", - "display(raw_catalog)\n", - "\n", - "# Pack all list-columns into single column\n", - "catalog_with_lc = raw_catalog.nest_lists(\n", - " base_columns=[col for col in raw_catalog.columns if not col.startswith(\"lc_\")],\n", - " name=\"lc\",\n", - ")\n", - "\n", - "# Pack non-detections\n", - "catalog_with_nondet = catalog_with_lc.nest_lists(\n", - " base_columns=[col for col in catalog_with_lc.columns if not col.startswith(\"nondet_\")],\n", - " name=\"nondet\",\n", - ")\n", - "\n", - "# Pack ZTF references\n", - "catalog = catalog_with_nondet.nest_lists(\n", - " base_columns=[col for col in catalog_with_nondet.columns if not col.startswith(\"ref_\")],\n", - " name=\"ref\",\n", - ")\n", - "\n", - "catalog" - ], "outputs": [ { "data": { - "text/plain": [ - "Dask NestedFrame Structure:\n", - " oid mean_ra mean_dec lc_ra lc_dec lc_candid lc_mjd lc_fid lc_pid lc_diffmaglim lc_isdiffpos lc_nid lc_magpsf lc_sigmapsf lc_magap lc_sigmagap lc_distnr lc_rb lc_rbversion lc_drb lc_drbversion lc_magapbig lc_sigmagapbig lc_rfid lc_magpsf_corr lc_sigmapsf_corr lc_sigmapsf_corr_ext lc_corrected lc_dubious lc_parent_candid lc_has_stamp lc_step_id_corr nondet_mjd nondet_fid nondet_diffmaglim ref_rfid ref_candid ref_fid ref_rcid ref_field ref_magnr ref_sigmagnr ref_chinr ref_sharpnr ref_ranr ref_decnr ref_mjdstartref ref_mjdendref ref_nframesref Norder Dir Npix\n", - "npartitions=113 \n", - "0 string[pyarrow] double[pyarrow] double[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] list[pyarrow] int8[pyarrow] int64[pyarrow] int64[pyarrow]\n", - "72057594037927936 ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "3170534137668829184 ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "3458764513820540928 ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n", - "Dask Name: nestedframe, 3 expressions\n", - "Expr=MapPartitions(NestedFrame)" - ], "text/html": [ "
lsdb Catalog alerce_nested:
\n", "