Fix probability data type (#696)

Signed-off-by: Lukas Heumos <[email protected]>
scverse · Jan 10, 2025 · 1e80db7 · 1e80db7
1 parent 218ccb3
commit 1e80db7
Show file tree

Hide file tree

Showing 12 changed files with 76 additions and 60 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -4,9 +4,9 @@
 
 <!-- Please fill in the appropriate checklist below (delete whatever is not relevant). These are the most common things requested on pull requests (PRs). -->
 
--   [ ] Referenced issue is linked
--   [ ] If you've fixed a bug or added code that should be tested, add tests!
--   [ ] Documentation in `docs` is updated
+- [ ] Referenced issue is linked
+- [ ] If you've fixed a bug or added code that should be tested, add tests!
+- [ ] Documentation in `docs` is updated
 
 **Description of changes**
 

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -16,10 +16,10 @@ jobs:
         steps:
             - uses: actions/checkout@v4
 
-            - name: Set up Python 3.11
+            - name: Set up Python
               uses: actions/setup-python@v5
               with:
-                  python-version: "3.11"
+                  python-version: "3.12"
                   cache: "pip"
                   cache-dependency-path: "**/pyproject.toml"
 

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,16 +2,16 @@ fail_fast: false
 default_language_version:
     python: python3
 default_stages:
-    - commit
-    - push
+    - pre-commit
+    - pre-push
 minimum_pre_commit_version: 2.16.0
 repos:
     - repo: https://github.com/pre-commit/mirrors-prettier
       rev: v4.0.0-alpha.8
       hooks:
           - id: prettier
     - repo: https://github.com/astral-sh/ruff-pre-commit
-      rev: v0.4.7
+      rev: v0.8.6
       hooks:
           - id: ruff
             args: [--fix, --exit-non-zero-on-fix, --unsafe-fixes]
@@ -27,7 +27,7 @@ repos:
           - id: trailing-whitespace
           - id: check-case-conflict
     - repo: https://github.com/pre-commit/mirrors-mypy
-      rev: v1.10.0
+      rev: v1.14.1
       hooks:
           - id: mypy
             args: [--no-strict-optional, --ignore-missing-imports]

diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -14,23 +14,23 @@ religion, or sexual identity and orientation.
 Examples of behavior that contributes to creating a positive environment
 include:
 
--   Using welcoming and inclusive language
--   Being respectful of differing viewpoints and experiences
--   Gracefully accepting constructive criticism
--   Focusing on what is best for the community
--   Showing empathy towards other community members
+- Using welcoming and inclusive language
+- Being respectful of differing viewpoints and experiences
+- Gracefully accepting constructive criticism
+- Focusing on what is best for the community
+- Showing empathy towards other community members
 
 Examples of unacceptable behavior by participants include:
 
--   The use of sexualized language or imagery and unwelcome sexual
-    attention or advances
--   Trolling, insulting/derogatory comments, and personal or political
-    attacks
--   Public or private harassment
--   Publishing others’ private information, such as a physical or
-    electronic address, without explicit permission
--   Other conduct which could reasonably be considered inappropriate in a
-    professional setting
+- The use of sexualized language or imagery and unwelcome sexual
+  attention or advances
+- Trolling, insulting/derogatory comments, and personal or political
+  attacks
+- Public or private harassment
+- Publishing others’ private information, such as a physical or
+  electronic address, without explicit permission
+- Other conduct which could reasonably be considered inappropriate in a
+  professional setting
 
 ## Our Responsibilities
 

diff --git a/docs/contributing.md b/docs/contributing.md
@@ -132,11 +132,11 @@ in the cookiecutter-scverse template.
 
 Please write documentation for new or changed features and use-cases. This project uses [sphinx][] with the following features:
 
--   the [myst][] extension allows to write documentation in markdown/Markedly Structured Text
--   Google-style docstrings
--   Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
--   [Sphinx autodoc typehints][], to automatically reference annotated input and output types
--   Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
+- the [myst][] extension allows to write documentation in markdown/Markedly Structured Text
+- Google-style docstrings
+- Jupyter notebooks as tutorials through [myst-nb][] (See [Tutorials with myst-nb](#tutorials-with-myst-nb-and-jupyter-notebooks))
+- [Sphinx autodoc typehints][], to automatically reference annotated input and output types
+- Citations (like {cite:p}`Virshup_2023`) can be included with [sphinxcontrib-bibtex](https://sphinxcontrib-bibtex.readthedocs.io/)
 
 See the [scanpy developer docs](https://scanpy.readthedocs.io/en/latest/dev/documentation.html) for more information
 on how to write documentation.
@@ -150,10 +150,10 @@ These notebooks come from [pert-tutorials](https://github.com/scverse/pertpy-tut
 
 #### Hints
 
--   If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only
-    if you do so can sphinx automatically create a link to the external documentation.
--   If building the documentation fails because of a missing link that is outside your control, you can add an entry to
-    the `nitpick_ignore` list in `docs/conf.py`
+- If you refer to objects from other packages, please add an entry to `intersphinx_mapping` in `docs/conf.py`. Only
+  if you do so can sphinx automatically create a link to the external documentation.
+- If building the documentation fails because of a missing link that is outside your control, you can add an entry to
+  the `nitpick_ignore` list in `docs/conf.py`
 
 #### Building the docs locally
 

diff --git a/docs/index.md b/docs/index.md
@@ -54,13 +54,13 @@ Discussions <https://github.com/scverse/pertpy/discussions>
 references
 ```
 
--   Consider citing [scanpy Genome Biology (2018)] along with original {doc}`references <references>`.
--   A paper for pertpy is in the works.
+- Consider citing [scanpy Genome Biology (2018)] along with original {doc}`references <references>`.
+- A paper for pertpy is in the works.
 
 # Indices and tables
 
--   {ref}`genindex`
--   {ref}`modindex`
--   {ref}`search`
+- {ref}`genindex`
+- {ref}`modindex`
+- {ref}`search`
 
 [scanpy genome biology (2018)]: https://doi.org/10.1186/s13059-017-1382-0
diff --git a/docs/usage/usage.md b/docs/usage/usage.md
@@ -563,33 +563,33 @@ including cell line annotation, bulk RNA and protein expression data.
 
 Available databases for cell line metadata:
 
--   [The Cancer Dependency Map Project at Broad](https://depmap.org/portal/)
--   [The Cancer Dependency Map Project at Sanger](https://depmap.sanger.ac.uk/)
--   [Genomics of Drug Sensitivity in Cancer (GDSC)](https://www.cancerrxgene.org/)
+- [The Cancer Dependency Map Project at Broad](https://depmap.org/portal/)
+- [The Cancer Dependency Map Project at Sanger](https://depmap.sanger.ac.uk/)
+- [Genomics of Drug Sensitivity in Cancer (GDSC)](https://www.cancerrxgene.org/)
 
 ### Compound
 
 The Compound module enables the retrieval of various types of information related to compounds of interest, including the most common synonym, pubchemID and canonical SMILES.
 
 Available databases for compound metadata:
 
--   [PubChem](https://pubchem.ncbi.nlm.nih.gov/)
+- [PubChem](https://pubchem.ncbi.nlm.nih.gov/)
 
 ### Mechanism of Action
 
 This module aims to retrieve metadata of mechanism of action studies related to perturbagens of interest, depending on the molecular targets.
 
 Available databases for mechanism of action metadata:
 
--   [CLUE](https://clue.io/)
+- [CLUE](https://clue.io/)
 
 ### Drug
 
 This module allows for the retrieval of Drug target information.
 
 Available databases for drug metadata:
 
--   [chembl](https://www.ebi.ac.uk/chembl/)
+- [chembl](https://www.ebi.ac.uk/chembl/)
 
 ```{eval-rst}
 .. autosummary::

diff --git a/pertpy/metadata/_cell_line.py b/pertpy/metadata/_cell_line.py
@@ -747,7 +747,7 @@ def plot_correlation(
                 if all(isinstance(id, str) for id in subset_identifier_list):
                     if set(subset_identifier_list).issubset(adata.obs[identifier].unique()):
                         subset_identifier_list = np.where(
-                            np.in1d(adata.obs[identifier].values, subset_identifier_list)
+                            np.isin(adata.obs[identifier].values, subset_identifier_list)
                         )[0]
                     else:
                         raise ValueError("`Subset_identifier` must be found in adata.obs.`identifier`.")

diff --git a/pertpy/tools/_coda/_base_coda.py b/pertpy/tools/_coda/_base_coda.py
@@ -850,7 +850,7 @@ def summary(self, data: AnnData | MuData, extended: bool = False, modality_key:
         table = Table(title="Compositional Analysis summary", box=box.SQUARE, expand=True, highlight=True)
         table.add_column("Name", justify="left", style="cyan")
         table.add_column("Value", justify="left")
-        table.add_row("Data", "Data: %d samples, %d cell types" % data_dims)
+        table.add_row("Data", f"Data: {data_dims[0]} samples, {data_dims[1]} cell types")
         table.add_row("Reference cell type", "{}".format(str(sample_adata.uns["scCODA_params"]["reference_cell_type"])))
         table.add_row("Formula", "{}".format(sample_adata.uns["scCODA_params"]["formula"]))
         if extended:

diff --git a/pertpy/tools/_mixscape.py b/pertpy/tools/_mixscape.py
@@ -67,8 +67,10 @@ def perturbation_signature(
                 If `None`, the representation is chosen automatically:
                 For `.n_vars` < 50, `.X` is used, otherwise 'X_pca' is used.
                 If 'X_pca' is not present, it’s computed with default parameters.
-            n_dims: Number of dimensions to use from the representation to calculate the perturbation signature. If `None`, use all dimensions.
-            n_pcs: If PCA representation is used, the number of principal components to compute. If `n_pcs==0` use `.X` if `use_rep is None`.
+            n_dims: Number of dimensions to use from the representation to calculate the perturbation signature.
+                If `None`, use all dimensions.
+            n_pcs: If PCA representation is used, the number of principal components to compute.
+                If `n_pcs==0` use `.X` if `use_rep is None`.
             batch_size: Size of batch to calculate the perturbation signature.
                 If 'None', the perturbation signature is calcuated in the full mode, requiring more memory.
                 The batched mode is very inefficient for sparse data.
@@ -130,7 +132,9 @@ def perturbation_signature(
                     shape=(n_split, n_control),
                 )
                 neigh_matrix /= n_neighbors
-                adata.layers["X_pert"][split_mask] = np.log1p(neigh_matrix @ X_control) - adata.layers["X_pert"][split_mask]
+                adata.layers["X_pert"][split_mask] = (
+                    np.log1p(neigh_matrix @ X_control) - adata.layers["X_pert"][split_mask]
+                )
             else:
                 is_sparse = issparse(X_control)
                 split_indices = np.where(split_mask)[0]
@@ -166,7 +170,7 @@ def mixscape(
         split_by: str | None = None,
         pval_cutoff: float | None = 5e-2,
         perturbation_type: str | None = "KO",
-            random_state: int | None = 0,
+        random_state: int | None = 0,
         copy: bool | None = False,
     ):
         """Identify perturbed and non-perturbed gRNA expressing cells that accounts for multiple treatments/conditions/chemical perturbations.
@@ -238,6 +242,7 @@ def mixscape(
                 raise KeyError(
                     "No 'X_pert' found in .layers! Please run perturbation_signature first to calculate perturbation signature!"
                 ) from None
+
         # initialize return variables
         adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0
         adata.obs[new_class_name] = adata.obs[labels].astype(str)
@@ -248,6 +253,8 @@ def mixscape(
             dtype=np.object_,
         )
         gv_list: dict[str, dict] = {}
+
+        adata.obs[f"{new_class_name}_p_{perturbation_type.lower()}"] = 0.0
         for split, split_mask in enumerate(split_masks):
             category = categories[split]
             genes = list(set(adata[split_mask].obs[labels]).difference([control]))
@@ -325,9 +332,7 @@ def mixscape(
                     )
 
                 adata.obs[f"{new_class_name}_global"] = [a.split(" ")[-1] for a in adata.obs[new_class_name]]
-                adata.obs.loc[orig_guide_cells_index, f"{new_class_name}_p_{perturbation_type.lower()}"] = np.round(
-                    post_prob
-                ).astype("int64")
+                adata.obs.loc[orig_guide_cells_index, f"{new_class_name}_p_{perturbation_type.lower()}"] = post_prob
         adata.uns["mixscape"] = gv_list
 
         if copy:

diff --git a/pyproject.toml b/pyproject.toml
@@ -145,7 +145,9 @@ filterwarnings = [
     "ignore:Importing read_elem from `anndata.experimental` is deprecated:FutureWarning",
     "ignore:ast.NameConstant is deprecated and will be removed in Python 3.14; use ast.Constant instead:DeprecationWarning",
     "ignore:'cgi' is deprecated and slated for removal in Python 3.13:DeprecationWarning",
-    "ignore:In the future, the default backend for leiden will be igraph instead of leidenalg:FutureWarning"
+    "ignore:In the future, the default backend for leiden will be igraph instead of leidenalg:FutureWarning",
+    "ignore:Transforming to str index:anndata.ImplicitModificationWarning",
+    "ignore:Failed to correctly find n_neighbors for some samples:UserWarning"
 ]
 
 [tool.ruff]

diff --git a/tests/tools/test_mixscape.py b/tests/tools/test_mixscape.py
@@ -99,6 +99,7 @@ def test_lda(adata):
 
     assert "mixscape_lda" in adata.uns
 
+
 def test_deterministic_perturbation_signature():
     n_genes = 5
     n_cells_per_class = 50
@@ -107,12 +108,17 @@ def test_deterministic_perturbation_signature():
 
     cell_classes_array = np.repeat(cell_classes, n_cells_per_class)
     groups_array = np.tile(np.repeat(groups, n_cells_per_class // 2), len(cell_classes))
-    obs = pd.DataFrame({"cell_class": cell_classes_array, "group": groups_array,
-                        "perturbation": ["control" if cell_class == "NT" else "pert1" for cell_class in cell_classes_array]})
+    obs = pd.DataFrame(
+        {
+            "cell_class": cell_classes_array,
+            "group": groups_array,
+            "perturbation": ["control" if cell_class == "NT" else "pert1" for cell_class in cell_classes_array],
+        }
+    )
 
     data = np.zeros((len(obs), n_genes))
-    pert_effect = np.random.uniform(-1, 1, size=(n_cells_per_class//len(groups), n_genes))
-    for group_idx, group in enumerate(groups):
+    pert_effect = np.random.default_rng().uniform(-1, 1, size=(n_cells_per_class // len(groups), n_genes))
+    for _, group in enumerate(groups):
         baseline_expr = 2 if group == "Group1" else 10
         group_mask = obs["group"] == group
 
@@ -129,10 +135,13 @@ def test_deterministic_perturbation_signature():
     adata = anndata.AnnData(X=data, obs=obs, var=var)
 
     mixscape_identifier = pt.tl.Mixscape()
-    mixscape_identifier.perturbation_signature(adata, pert_key="perturbation", control="control", n_neighbors=5, split_by="group")
+    mixscape_identifier.perturbation_signature(
+        adata, pert_key="perturbation", control="control", n_neighbors=5, split_by="group"
+    )
 
     assert "X_pert" in adata.layers
     assert np.allclose(adata.layers["X_pert"][obs["cell_class"] == "NT"], 0)
     assert np.allclose(adata.layers["X_pert"][obs["cell_class"] == "NP"], 0)
-    assert np.allclose(adata.layers["X_pert"][obs["cell_class"] == "KO"], -np.concatenate([pert_effect] * len(groups), axis=0))
-
+    assert np.allclose(
+        adata.layers["X_pert"][obs["cell_class"] == "KO"], -np.concatenate([pert_effect] * len(groups), axis=0)
+    )