Skip to content

Commit

Permalink
personalities
Browse files Browse the repository at this point in the history
  • Loading branch information
pothiers committed Dec 17, 2024
1 parent 55e6f27 commit 432a688
Showing 1 changed file with 95 additions and 0 deletions.
95 changes: 95 additions & 0 deletions python/lsst/ts/logging_and_reporting/cdb_aux.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import os

import pandas as pd
import requests


# Use https://usdf-rsp-dev.slac.stanford.edu/consdb/docs
# to create a table focused on where to find fields in the schema.
# Helpful to tell what instruments have a field.
# May lead to "instrument personality"
# endpoint='https://usdf-rsp-dev.slac.stanford.edu/consdb/schema'
def field_schema_location_table(
schema_endpoint, # .../consdb/schema
exclude_instruments=None,
):
exclude_default = {
# "latiss",
# "lsstcam",
# "lsstcamsim",
# "lsstcomcam",
# "lsstcomcamsim",
"startrackerfast",
"startrackernarrow",
"startrackerwide",
}
if exclude_instruments is None:
exclude = exclude_default
else:
exclude = set(exclude_instruments)

# schemas[instrum][table] => [field_name_1, ...]
schemas = dict()

token = os.environ.get("ACCESS_TOKEN")
auth = ("user", token)
timeout = (5.05, 20.0)
url = schema_endpoint
response = requests.get(url, auth=auth, timeout=timeout)
response.raise_for_status()
instruments = set(response.json())

# Collect Fields associated with non-excluded Instruments (with
# the Table that contains them).
available = instruments - exclude
for instrument in available:
schemas[instrument] = dict()
url = f"{schema_endpoint}/{instrument}"
response = requests.get(url, auth=auth, timeout=timeout)
response.raise_for_status()
for table in set(response.json()):
url = f"{schema_endpoint}/{instrument}/{table}"
response = requests.get(url, auth=auth, timeout=timeout)
response.raise_for_status()
schemas[instrument][table] = set(response.json().keys())
# Now we have
# schemas[instrum][table] => [field_name_1, ...]

df = pd.DataFrame(
[
{"instrument": instrum, "table": table, "field": field}
for instrum in schemas
for table in schemas[instrum]
for field in schemas[instrum][table]
]
)

# Its nice to pivot the table for viewing with something like:
# pv = df.pivot_table(index='field',
# columns='instrument',
# values='table',
# aggfunc=lambda x: ' '.join(x))
# HTML(pv.to_html())

pv = df.pivot_table(
index="field", columns="instrument", values="table", aggfunc=list
)

return pv


def common_fields(df):
"""List of fields common to all instruments."""
return df[~(df.isna().any(axis=1))].index.to_list()


def uncommon_fields(df):
"""List of fields that are missing from at least one instrument."""
return df[df.isna().any(axis=1)].index.to_list()


def divergent_personalities(df):
"""Percent of fields in each instrument that is not common to
all instruments.
"""
return (df.isna().sum() / df.shape[0]).to_dict()

0 comments on commit 432a688

Please sign in to comment.