From 8182f6ce0027d088a03cf04924765567a9ec77fc Mon Sep 17 00:00:00 2001 From: SarahOuologuem Date: Mon, 11 Mar 2024 09:21:36 +0000 Subject: [PATCH 01/27] force color to be a string --- muon/_core/plot.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/muon/_core/plot.py b/muon/_core/plot.py index da8cab3..7d72f3e 100644 --- a/muon/_core/plot.py +++ b/muon/_core/plot.py @@ -22,7 +22,7 @@ def scatter( data: Union[AnnData, MuData], x: Optional[str] = None, y: Optional[str] = None, - color: Optional[Union[str, Sequence[str]]] = None, + color: Optional[str] = None, use_raw: Optional[bool] = None, layers: Optional[Union[str, Sequence[str]]] = None, **kwargs, @@ -42,8 +42,8 @@ def scatter( x coordinate y : Optional[str] y coordinate - color : Optional[Union[str, Sequence[str]]], optional (default: None) - Keys for variables or annotations of observations (.obs columns), + color : Optional[str], optional (default: None) + Key for variables or annotations of observations (.obs columns), or a hex colour specification. use_raw : Optional[bool], optional (default: None) Use `.raw` attribute of the modality where a feature (from `color`) is derived from. @@ -72,10 +72,10 @@ def scatter( if isinstance(color, str): color_obs = _get_values(data, color, use_raw=use_raw, layer=layers[2]) color_obs = pd.DataFrame({color: color_obs}) - color = [color] else: - # scanpy#311 / scanpy#1497 has to be fixed for this to work - color_obs = _get_values(data, color, use_raw=use_raw, layer=layers[2]) + raise TypeError("Expected color to be a string.") + + color_obs.index = data.obs_names obs = pd.concat([obs, color_obs], axis=1, ignore_index=False) @@ -86,14 +86,14 @@ def scatter( # and are now stored in .obs retval = sc.pl.scatter(ad, x=x, y=y, color=color, **kwargs) if color is not None: - for col in color: - try: - data.uns[f"{col}_colors"] = ad.uns[f"{col}_colors"] - except KeyError: - pass + try: + data.uns[f"{color}_colors"] = ad.uns[f"{color}_colors"] + except KeyError: + pass return retval + # # Embedding # From 9115a33a9e965df0b825a6c51389dd4ed47099af Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 11:58:51 +0200 Subject: [PATCH 02/27] (fix): fix obs filtering --- muon/_core/preproc.py | 4 ++-- tests/test_filter.py | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 tests/test_filter.py diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index 974cbae..4987160 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -757,10 +757,10 @@ def func(x): else: # filter_obs() for each modality - for m, mod in data.mod.items(): + for m in data.mod.keys(): obsmap = data.obsmap[m][obs_subset] obsidx = obsmap > 0 - filter_obs(mod, mod.obs_names[obsmap[obsidx] - 1]) + data.mod[m] = data.mod[m][data.mod[m].obs_names[obsmap[obsidx] - 1]] maporder = np.argsort(obsmap[obsidx]) nobsmap = np.empty(maporder.size) nobsmap[maporder] = np.arange(1, maporder.size + 1) diff --git a/tests/test_filter.py b/tests/test_filter.py new file mode 100644 index 0000000..1edc240 --- /dev/null +++ b/tests/test_filter.py @@ -0,0 +1,13 @@ +import scanpy as sc +import muon as mu + +class TestFilter(): + def test_filter_obs_simple(self): + adata = sc.datasets.pbmc3k_processed() + mdata = mu.MuData({ + "A": adata[:500, ].copy(), + "B": adata[500:, ].copy() + }) + mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") + assert mdata["B"].n_obs == 0 + assert mdata["A"].obs["louvain"].unique() == "B cells" \ No newline at end of file From 74cf7a8112397fcb9a02acf53aa821e0fa50e7b3 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:01:34 +0200 Subject: [PATCH 03/27] (chore): fixture --- tests/conftest.py | 6 +++++- tests/test_filter.py | 8 +++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d9ab19a..f7feb94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,5 @@ import pytest - +import scanpy as sc @pytest.fixture(scope="module") def filepath_h5mu(tmpdir_factory): @@ -9,3 +9,7 @@ def filepath_h5mu(tmpdir_factory): @pytest.fixture(scope="module") def filepath_hdf5(tmpdir_factory): yield str(tmpdir_factory.mktemp("tmp_mofa_dir").join("mofa_pytest.hdf5")) + +@pytest.fixture(scope="module") +def pbmc3k_processed(): + yield sc.datasets.pbmc3k_processed() diff --git a/tests/test_filter.py b/tests/test_filter.py index 1edc240..bbdd9a3 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,12 +1,10 @@ -import scanpy as sc import muon as mu class TestFilter(): - def test_filter_obs_simple(self): - adata = sc.datasets.pbmc3k_processed() + def test_filter_obs_simple(self, pbmc3k_processed): mdata = mu.MuData({ - "A": adata[:500, ].copy(), - "B": adata[500:, ].copy() + "A": pbmc3k_processed[:500, ].copy(), + "B": pbmc3k_processed[500:, ].copy() }) mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") assert mdata["B"].n_obs == 0 From 79a20a8e0bfcdebb395cad331fc6bcc852abfb88 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:06:20 +0200 Subject: [PATCH 04/27] (fix): black + add in-place assertion --- tests/conftest.py | 2 ++ tests/test_filter.py | 14 ++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index f7feb94..4369a0d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ import pytest import scanpy as sc + @pytest.fixture(scope="module") def filepath_h5mu(tmpdir_factory): yield str(tmpdir_factory.mktemp("tmp_test_dir").join("test.h5mu")) @@ -10,6 +11,7 @@ def filepath_h5mu(tmpdir_factory): def filepath_hdf5(tmpdir_factory): yield str(tmpdir_factory.mktemp("tmp_mofa_dir").join("mofa_pytest.hdf5")) + @pytest.fixture(scope="module") def pbmc3k_processed(): yield sc.datasets.pbmc3k_processed() diff --git a/tests/test_filter.py b/tests/test_filter.py index bbdd9a3..a3ee119 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,11 +1,13 @@ import muon as mu -class TestFilter(): + +class TestFilter: def test_filter_obs_simple(self, pbmc3k_processed): - mdata = mu.MuData({ - "A": pbmc3k_processed[:500, ].copy(), - "B": pbmc3k_processed[500:, ].copy() - }) + A = pbmc3k_processed[:500,].copy() + B = pbmc3k_processed[500:,].copy() + mdata = mu.MuData({"A": A, "B": B}) mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") assert mdata["B"].n_obs == 0 - assert mdata["A"].obs["louvain"].unique() == "B cells" \ No newline at end of file + assert mdata["A"].obs["louvain"].unique() == "B cells" + assert B.n_obs == 0 + assert A.obs["louvain"].unique() == "B cells" From 0afc23a69e8c2a2bab9564f91d97f7b58a8b3506 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:06:29 +0200 Subject: [PATCH 05/27] (fix): do proper in-place update --- muon/_core/preproc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index 4987160..953765d 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -760,7 +760,7 @@ def func(x): for m in data.mod.keys(): obsmap = data.obsmap[m][obs_subset] obsidx = obsmap > 0 - data.mod[m] = data.mod[m][data.mod[m].obs_names[obsmap[obsidx] - 1]] + data.mod[m]._obs = data.mod[m].obs.loc[data.mod[m].obs_names[obsmap[obsidx] - 1]] maporder = np.argsort(obsmap[obsidx]) nobsmap = np.empty(maporder.size) nobsmap[maporder] = np.arange(1, maporder.size + 1) From c2b5988c2ce9ed35d62022275589d08cfb9c4af1 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:08:09 +0200 Subject: [PATCH 06/27] (core): clean up naming --- muon/_core/preproc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index 953765d..3b6a603 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -757,15 +757,16 @@ def func(x): else: # filter_obs() for each modality - for m in data.mod.keys(): - obsmap = data.obsmap[m][obs_subset] + for modality_key in data.mod.keys(): + obsmap = data.obsmap[modality_key][obs_subset] obsidx = obsmap > 0 - data.mod[m]._obs = data.mod[m].obs.loc[data.mod[m].obs_names[obsmap[obsidx] - 1]] + modality = data.mod[modality_key] + data.mod[modality_key]._obs = modality.loc[modality.obs_names[obsmap[obsidx] - 1]] maporder = np.argsort(obsmap[obsidx]) nobsmap = np.empty(maporder.size) nobsmap[maporder] = np.arange(1, maporder.size + 1) obsmap[obsidx] = nobsmap - data.obsmap[m] = obsmap + data.obsmap[modality_key] = obsmap return From 5f4ba9ffd22a1dc28df8982bf75e9e6ffd0dcebf Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:12:02 +0200 Subject: [PATCH 07/27] (fix): gitignore file --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 81d184d..32b5732 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ __pycache__/ # C extensions *.so +# cached data +data/ + # Distribution / packaging .Python build/ From f3eb615ba354485e6593478969e62506dd0f2f85 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:17:50 +0200 Subject: [PATCH 08/27] (fix): CI branches --- .github/workflows/pythonpackage.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 5f82d4f..e717144 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -2,9 +2,9 @@ name: Python package on: push: - branches: [master] + branches: [main] pull_request: - branches: [master] + branches: [main] schedule: - cron: "0 5 1,15 * *" From 9bb369f9de2e5cd93b8a38300fddc59f181100e4 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:19:25 +0200 Subject: [PATCH 09/27] (fix): ci --- .github/workflows/pythonpackage.yml | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 5f82d4f..889d628 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -2,9 +2,9 @@ name: Python package on: push: - branches: [master] + branches: [main] pull_request: - branches: [master] + branches: [main] schedule: - cron: "0 5 1,15 * *" @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.12] + python-version: [3.10, 3.12] steps: - uses: actions/checkout@v4 diff --git a/pyproject.toml b/pyproject.toml index cbc8d63..d322dd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ classifiers = [ "Topic :: Scientific/Engineering :: Bio-Informatics", "Intended Audience :: Science/Research" ] -requires-python = ">= 3.8" +requires-python = ">= 3.10" requires = [ "numpy", "pandas", From 763edcc097911e47e988db51c6ea516ca1784e41 Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:21:21 +0200 Subject: [PATCH 10/27] (fix): use strings for versions --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 889d628..c055ef4 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.10, 3.12] + python-version: ["3.10", "3.12"] steps: - uses: actions/checkout@v4 From 2f3b85fb47bc55de246375d672cd1be8127f0daa Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:23:20 +0200 Subject: [PATCH 11/27] (fix): try bigger tol --- tests/test_muon_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_muon_tools.py b/tests/test_muon_tools.py index 60bf927..794d388 100644 --- a/tests/test_muon_tools.py +++ b/tests/test_muon_tools.py @@ -144,7 +144,7 @@ def test_multi_group(self, filepath_hdf5): for sample, value in (("sample9_groupA", 1.719391), ("sample17_groupB", -2.057848)): si = np.where(mdata.obs.index == sample)[0] - assert mdata.obsm["X_mofa"][si, 0] == pytest.approx(value) + assert mdata.obsm["X_mofa"][si, 0] == pytest.approx(value, 1e-5) if __name__ == "__main__": From 9aaeac271017af172e61c3d9c6108ef90f9ab95c Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:26:22 +0200 Subject: [PATCH 12/27] (fix): try bigger --- tests/test_muon_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_muon_tools.py b/tests/test_muon_tools.py index 794d388..a38fa2d 100644 --- a/tests/test_muon_tools.py +++ b/tests/test_muon_tools.py @@ -144,7 +144,7 @@ def test_multi_group(self, filepath_hdf5): for sample, value in (("sample9_groupA", 1.719391), ("sample17_groupB", -2.057848)): si = np.where(mdata.obs.index == sample)[0] - assert mdata.obsm["X_mofa"][si, 0] == pytest.approx(value, 1e-5) + assert mdata.obsm["X_mofa"][si, 0] == pytest.approx(value, 1e-4) if __name__ == "__main__": From b771af720254ad517c3a511f8deb60a80d475e4b Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Tue, 15 Oct 2024 12:30:41 +0200 Subject: [PATCH 13/27] (fix): use `item` --- tests/test_muon_tools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_muon_tools.py b/tests/test_muon_tools.py index a38fa2d..089f5d6 100644 --- a/tests/test_muon_tools.py +++ b/tests/test_muon_tools.py @@ -144,7 +144,7 @@ def test_multi_group(self, filepath_hdf5): for sample, value in (("sample9_groupA", 1.719391), ("sample17_groupB", -2.057848)): si = np.where(mdata.obs.index == sample)[0] - assert mdata.obsm["X_mofa"][si, 0] == pytest.approx(value, 1e-4) + assert mdata.obsm["X_mofa"][si, 0].item() == pytest.approx(value) if __name__ == "__main__": From 5e9d80fe222ee6ddc9a77d34667310f028143f1c Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 02:41:18 -0700 Subject: [PATCH 14/27] Compatibility with NumPy v2 to fix CI --- muon/_atac/preproc.py | 2 +- tests/test_muon_tools.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/muon/_atac/preproc.py b/muon/_atac/preproc.py index fee70cc..1756a38 100644 --- a/muon/_atac/preproc.py +++ b/muon/_atac/preproc.py @@ -116,7 +116,7 @@ def tfidf( if log_tfidf: tf_idf = np.log1p(tf_idf) - res = np.nan_to_num(tf_idf, 0) + res = np.nan_to_num(tf_idf, nan=0.0) if not inplace: return res diff --git a/tests/test_muon_tools.py b/tests/test_muon_tools.py index 089f5d6..e08346f 100644 --- a/tests/test_muon_tools.py +++ b/tests/test_muon_tools.py @@ -142,7 +142,7 @@ def test_multi_group(self, filepath_hdf5): assert all(mdata.obs.group.values == mdata.obs.true_group.values) - for sample, value in (("sample9_groupA", 1.719391), ("sample17_groupB", -2.057848)): + for sample, value in (("sample9_groupA", -1.719391), ("sample17_groupB", 2.057848)): si = np.where(mdata.obs.index == sample)[0] assert mdata.obsm["X_mofa"][si, 0].item() == pytest.approx(value) From 3af3502488cd60502494e8e5a71aca2aef5e35d7 Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 03:51:21 -0700 Subject: [PATCH 15/27] Fix filter_obs and filter_var to work with the latest anndata --- muon/_core/preproc.py | 96 +++++++++++++++++++++++++++----------- pyproject.toml | 1 + tests/test_filter.py | 13 ------ tests/test_muon_preproc.py | 20 ++++++++ 4 files changed, 89 insertions(+), 41 deletions(-) delete mode 100644 tests/test_filter.py diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index 3b6a603..d6b3219 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -720,19 +720,27 @@ def func(x): else: raise ValueError("When providing obs_names directly, func has to be None.") - # Subset .obs - data._obs = data.obs[obs_subset] - data._n_obs = data.obs.shape[0] - - # Subset .obsm - for k, v in data.obsm.items(): - data.obsm[k] = v[obs_subset] - - # Subset .obsp - for k, v in data.obsp.items(): - data.obsp[k] = v[obs_subset][:, obs_subset] - if isinstance(data, AnnData): + # Collect elements to subset + # NOTE: accessing them after subsetting .obs + # will fail due to _validate_value() + obsm = dict(data.obsm) + obsp = dict(data.obsp) + + # Subset .obs + data._obs = data.obs[obs_subset] + data._n_obs = data.obs.shape[0] + + # Subset .obsm + for k, v in obsm.items(): + obsm[k] = v[obs_subset] + data.obsm = obsm + + # Subset .obsp + for k, v in obsp.items(): + obsp[k] = v[obs_subset][:, obs_subset] + data.obsp = obsp + # Subset .X if data._X is not None: try: @@ -756,17 +764,28 @@ def func(x): data.raw._n_obs = data.raw.X.shape[0] else: + # Subset .obs + data._obs = data.obs[obs_subset] + data._n_obs = data.obs.shape[0] + + # Subset .obsm + for k, v in data.obsm.items(): + data.obsm[k] = v[obs_subset] + + # Subset .obsp + for k, v in data.obsp.items(): + data.obsp[k] = v[obs_subset][:, obs_subset] + # filter_obs() for each modality - for modality_key in data.mod.keys(): - obsmap = data.obsmap[modality_key][obs_subset] + for m, mod in data.mod.items(): + obsmap = data.obsmap[m][obs_subset] obsidx = obsmap > 0 - modality = data.mod[modality_key] - data.mod[modality_key]._obs = modality.loc[modality.obs_names[obsmap[obsidx] - 1]] + filter_obs(mod, mod.obs_names[obsmap[obsidx] - 1]) maporder = np.argsort(obsmap[obsidx]) nobsmap = np.empty(maporder.size) nobsmap[maporder] = np.arange(1, maporder.size + 1) obsmap[obsidx] = nobsmap - data.obsmap[modality_key] = obsmap + data.obsmap[m] = obsmap return @@ -831,19 +850,28 @@ def func(x): else: raise ValueError("When providing var_names directly, func has to be None.") - # Subset .var - data._var = data.var[var_subset] - data._n_vars = data.var.shape[0] - - # Subset .varm - for k, v in data.varm.items(): - data.varm[k] = v[var_subset] - - # Subset .varp - for k, v in data.varp.items(): - data.varp[k] = v[var_subset][:, var_subset] if isinstance(data, AnnData): + # Collect elements to subset + # NOTE: accessing them after subsetting .var + # will fail due to _validate_value() + varm = dict(data.varm) + varp = dict(data.varp) + + # Subset .obs + data._var = data.var[var_subset] + data._n_vars = data.var.shape[0] + + # Subset .obsm + for k, v in varm.items(): + varm[k] = v[var_subset] + data.varm = varm + + # Subset .obsp + for k, v in varp.items(): + varp[k] = v[var_subset][:, var_subset] + data.varp = varp + # Subset .X try: data._X = data.X[:, var_subset] @@ -862,6 +890,18 @@ def func(x): # NOTE: .raw is not subsetted else: + # Subset .var + data._var = data.var[var_subset] + data._n_vars = data.var.shape[0] + + # Subset .varm + for k, v in data.varm.items(): + data.varm[k] = v[var_subset] + + # Subset .varp + for k, v in data.varp.items(): + data.varp[k] = v[var_subset][:, var_subset] + # filter_var() for each modality for m, mod in data.mod.items(): varmap = data.varmap[m][var_subset] diff --git a/pyproject.toml b/pyproject.toml index cbc8d63..0449d0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ atac = [ test = [ "pytest", "flake8", + "pytest", ] [tool.flit.metadata.urls] diff --git a/tests/test_filter.py b/tests/test_filter.py deleted file mode 100644 index a3ee119..0000000 --- a/tests/test_filter.py +++ /dev/null @@ -1,13 +0,0 @@ -import muon as mu - - -class TestFilter: - def test_filter_obs_simple(self, pbmc3k_processed): - A = pbmc3k_processed[:500,].copy() - B = pbmc3k_processed[500:,].copy() - mdata = mu.MuData({"A": A, "B": B}) - mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") - assert mdata["B"].n_obs == 0 - assert mdata["A"].obs["louvain"].unique() == "B cells" - assert B.n_obs == 0 - assert A.obs["louvain"].unique() == "B cells" diff --git a/tests/test_muon_preproc.py b/tests/test_muon_preproc.py index b4e388c..586668b 100644 --- a/tests/test_muon_preproc.py +++ b/tests/test_muon_preproc.py @@ -83,6 +83,16 @@ def test_filter_obs_adata_view(self, mdata, filepath_h5mu): sub = np.random.binomial(1, 0.5, view.n_obs).astype(bool) mu.pp.filter_obs(view, sub) + def test_filter_obs_with_obsm_obsp(self, pbmc3k_processed): + A = pbmc3k_processed[:500,].copy() + B = pbmc3k_processed[500:,].copy() + mdata = mu.MuData({"A": A, "B": B}) + mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") + assert mdata["B"].n_obs == 0 + assert mdata["A"].obs["louvain"].unique() == "B cells" + assert B.n_obs == 0 + assert A.obs["louvain"].unique() == "B cells" + # Variables def test_filter_var_adata(self, mdata, filepath_h5mu): @@ -132,6 +142,16 @@ def test_filter_var_adata_view(self, mdata, filepath_h5mu): sub = np.random.binomial(1, 0.5, view.n_vars).astype(bool) mu.pp.filter_var(view, sub) + def test_filter_var_with_varm_varp(self, pbmc3k_processed): + A = pbmc3k_processed[:500,].copy() + B = pbmc3k_processed[500:,].copy() + mdata = mu.MuData({"A": A, "B": B}) + np.random.seed(42) + var_sel = np.random.choice(np.array([0, 1]), size=mdata.n_vars, replace=True) + mdata.var["sel"] = var_sel + mu.pp.filter_var(mdata, "sel", lambda y: y == 1) + assert mdata.shape[1] == int(np.sum(var_sel)) + @pytest.mark.usefixtures("filepath_h5mu") class TestIntersectObs: From 9a4c931890ff3ad0b391aad102338ee2f3513dea Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 03:53:12 -0700 Subject: [PATCH 16/27] Black formatting --- muon/_core/preproc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index d6b3219..2584649 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -850,7 +850,6 @@ def func(x): else: raise ValueError("When providing var_names directly, func has to be None.") - if isinstance(data, AnnData): # Collect elements to subset # NOTE: accessing them after subsetting .var @@ -901,7 +900,7 @@ def func(x): # Subset .varp for k, v in data.varp.items(): data.varp[k] = v[var_subset][:, var_subset] - + # filter_var() for each modality for m, mod in data.mod.items(): varmap = data.varmap[m][var_subset] From 7eaf611c298322ce8413ae298458d58309329f4c Mon Sep 17 00:00:00 2001 From: ilan-gold Date: Wed, 16 Oct 2024 12:55:50 +0200 Subject: [PATCH 17/27] (fix): add full object test --- tests/test_filter.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_filter.py b/tests/test_filter.py index a3ee119..10c2277 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,13 +1,18 @@ import muon as mu +from anndata.tests.helpers import assert_equal class TestFilter: def test_filter_obs_simple(self, pbmc3k_processed): A = pbmc3k_processed[:500,].copy() + A_subset = A[A.obs["louvain"] == "B cells"].copy() B = pbmc3k_processed[500:,].copy() + B_subset = B[B.obs["louvain"] == "NOT HERE"].copy() mdata = mu.MuData({"A": A, "B": B}) mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") assert mdata["B"].n_obs == 0 assert mdata["A"].obs["louvain"].unique() == "B cells" assert B.n_obs == 0 assert A.obs["louvain"].unique() == "B cells" + assert_equal(mdata["A"], A_subset) + assert_equal(mdata["B"], B_subset) From 957c6184af9ec93cd432579ce597d9898cd36ae7 Mon Sep 17 00:00:00 2001 From: Danila <32863903+gtca@users.noreply.github.com> Date: Wed, 16 Oct 2024 13:05:35 +0200 Subject: [PATCH 18/27] Drop Python 3.8 from CI --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 5f82d4f..937e62f 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.12] + python-version: [3.10, 3.11, 3.12] steps: - uses: actions/checkout@v4 From 2aab65cee21daba24ee779fc3207a978a469a1c3 Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 04:11:35 -0700 Subject: [PATCH 19/27] Remove unused categories after filtering --- muon/_core/preproc.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index 2584649..685bb1c 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -780,7 +780,9 @@ def func(x): for m, mod in data.mod.items(): obsmap = data.obsmap[m][obs_subset] obsidx = obsmap > 0 + orig_obs = mod.obs.copy() filter_obs(mod, mod.obs_names[obsmap[obsidx] - 1]) + data.mod[m]._remove_unused_categories(orig_obs, mod.obs, mod.uns) maporder = np.argsort(obsmap[obsidx]) nobsmap = np.empty(maporder.size) nobsmap[maporder] = np.arange(1, maporder.size + 1) @@ -905,7 +907,9 @@ def func(x): for m, mod in data.mod.items(): varmap = data.varmap[m][var_subset] varidx = varmap > 0 + orig_var = mod.var.copy() filter_var(mod, mod.var_names[varmap[varidx] - 1]) + data.mod[m]._remove_unused_categories(orig_var, mod.var, mod.uns) maporder = np.argsort(varmap[varidx]) nvarmap = np.empty(maporder.size) nvarmap[maporder] = np.arange(1, maporder.size + 1) From 172223796ed96705e53dffa049bb8db8a29a3436 Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 04:20:43 -0700 Subject: [PATCH 20/27] Improve tests for in-place filtering --- tests/test_muon_preproc.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/tests/test_muon_preproc.py b/tests/test_muon_preproc.py index 428fec0..3465f39 100644 --- a/tests/test_muon_preproc.py +++ b/tests/test_muon_preproc.py @@ -83,15 +83,15 @@ def test_filter_obs_adata_view(self, mdata, filepath_h5mu): mu.pp.filter_obs(view, sub) def test_filter_obs_with_obsm_obsp(self, pbmc3k_processed): - A = pbmc3k_processed[:500,].copy() + A = pbmc3k_processed[:, :500].copy() + B = pbmc3k_processed[:, 500:].copy() A_subset = A[A.obs["louvain"] == "B cells"].copy() - B = pbmc3k_processed[500:,].copy() - B_subset = B[B.obs["louvain"] == "NOT HERE"].copy() + B_subset = B[B.obs["louvain"] == "B cells"].copy() mdata = mu.MuData({"A": A, "B": B}) mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") - assert mdata["B"].n_obs == 0 + assert mdata["B"].n_obs == B_subset.n_obs assert mdata["A"].obs["louvain"].unique() == "B cells" - assert B.n_obs == 0 + assert B.n_obs == B_subset.n_obs assert A.obs["louvain"].unique() == "B cells" assert_equal(mdata["A"], A_subset) assert_equal(mdata["B"], B_subset) @@ -146,14 +146,21 @@ def test_filter_var_adata_view(self, mdata, filepath_h5mu): mu.pp.filter_var(view, sub) def test_filter_var_with_varm_varp(self, pbmc3k_processed): - A = pbmc3k_processed[:500,].copy() - B = pbmc3k_processed[500:,].copy() - mdata = mu.MuData({"A": A, "B": B}) + A = pbmc3k_processed[:, :500].copy() + B = pbmc3k_processed[:, 500:].copy() np.random.seed(42) - var_sel = np.random.choice(np.array([0, 1]), size=mdata.n_vars, replace=True) - mdata.var["sel"] = var_sel + A_var_sel = np.random.choice(np.array([0, 1]), size=A.n_vars, replace=True) + B_var_sel = np.random.choice(np.array([0, 1]), size=B.n_vars, replace=True) + A.var["sel"] = A_var_sel + B.var["sel"] = B_var_sel + A_subset = A[:, A_var_sel == 1].copy() + B_subset = B[:, B_var_sel == 1].copy() + mdata = mu.MuData({"A": A, "B": B}) + mdata.pull_var("sel") mu.pp.filter_var(mdata, "sel", lambda y: y == 1) - assert mdata.shape[1] == int(np.sum(var_sel)) + assert mdata.shape[1] == int(np.sum(A_var_sel) + np.sum(B_var_sel)) + assert_equal(mdata["A"], A_subset) + assert_equal(mdata["B"], B_subset) @pytest.mark.usefixtures("filepath_h5mu") From 04489bf5f6138038f0c24468b5d6b0e11fc14b2d Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 16:03:07 -0700 Subject: [PATCH 21/27] Make tests for filtering more sensible --- tests/test_muon_preproc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_muon_preproc.py b/tests/test_muon_preproc.py index 3465f39..8a4ed99 100644 --- a/tests/test_muon_preproc.py +++ b/tests/test_muon_preproc.py @@ -83,12 +83,13 @@ def test_filter_obs_adata_view(self, mdata, filepath_h5mu): mu.pp.filter_obs(view, sub) def test_filter_obs_with_obsm_obsp(self, pbmc3k_processed): - A = pbmc3k_processed[:, :500].copy() - B = pbmc3k_processed[:, 500:].copy() + A = pbmc3k_processed[:500,].copy() + B = pbmc3k_processed[500:,].copy() A_subset = A[A.obs["louvain"] == "B cells"].copy() B_subset = B[B.obs["louvain"] == "B cells"].copy() - mdata = mu.MuData({"A": A, "B": B}) - mu.pp.filter_obs(mdata, "A:louvain", lambda x: x == "B cells") + mdata = mu.MuData({"A": A, "B": B}, axis=1) + mdata.pull_obs("louvain") + mu.pp.filter_obs(mdata, "louvain", lambda x: x == "B cells") assert mdata["B"].n_obs == B_subset.n_obs assert mdata["A"].obs["louvain"].unique() == "B cells" assert B.n_obs == B_subset.n_obs From 1909f5190e32741799b684474563233198e7b06f Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 16:03:46 -0700 Subject: [PATCH 22/27] Deduplicate filter_obs/filter_var code --- muon/_core/preproc.py | 258 +++++++++++++++--------------------------- 1 file changed, 90 insertions(+), 168 deletions(-) diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index 685bb1c..b108797 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -657,23 +657,24 @@ def intersect_obs(mdata: MuData): return -# Utility functions: filtering observations +# Utility functions: filtering observations or variables -def filter_obs( - data: Union[AnnData, MuData], var: Union[str, Sequence[str]], func: Optional[Callable] = None +def _filter_attr( + data: Union[AnnData, MuData], + attr: Literal["obs", "var"], + key: Union[str, Sequence[str]], + func: Optional[Callable] = None, ) -> None: """ - Filter observations (samples or cells) in-place - using any column in .obs or in .X. + Filter observations or variables in-place. Parameters ---------- data: AnnData or MuData AnnData or MuData object - var: str or Sequence[str] - Column name in .obs or in .X to be used for filtering. - Alternatively, obs_names can be provided directly. + key: str or Sequence[str] + Names or key to filter func Function to apply to the variable used for filtering. If the variable is of type boolean and func is an identity function, @@ -694,59 +695,76 @@ def filter_obs( "MuData object is backed. The requested subset of the .X matrices of its modalities will be read into memory, and the object will not be backed anymore." ) - if isinstance(var, str): - if var in data.obs.columns: + assert attr in ("obs", "var"), "Attribute has to be either 'obs' or 'var'." + + df = getattr(data, attr) + names = getattr(data, f"{attr}_names") + other = "obs" if attr == "var" else "var" + other_names = getattr(data, f"{other}_names") + attrm = getattr(data, f"{attr}m") + attrp = getattr(data, f"{attr}p") + + if isinstance(key, str): + if key in df.columns: if func is None: - if data.obs[var].dtypes.name == "bool": + if df[key].dtypes.name == "bool": def func(x): return x else: - raise ValueError(f"Function has to be provided since {var} is not boolean") - obs_subset = func(data.obs[var].values) - elif var in data.var_names: - obs_subset = func(data.X[:, np.where(data.var_names == var)[0]].reshape(-1)) + raise ValueError(f"Function has to be provided since {key} is not boolean") + subset = func(df[key].values) + elif key in other_names: + if attr == "obs": + subset = func(data.X[:, np.where(other_names == key)[0]].reshape(-1)) + else: + subset = func(data.X[np.where(other_names == key)[0], :].reshape(-1)) else: raise ValueError( - f"Column name from .obs or one of the var_names was expected but got {var}." + f"Column name from .{attr} or one of the {other}_names was expected but got {key}." ) else: if func is None: - if np.array(var).dtype == bool: - obs_subset = np.array(var) + if np.array(key).dtype == bool: + subset = np.array(key) else: - obs_subset = data.obs_names.isin(var) + subset = names.isin(key) else: - raise ValueError("When providing obs_names directly, func has to be None.") + raise ValueError(f"When providing {attr}_names directly, func has to be None.") if isinstance(data, AnnData): # Collect elements to subset - # NOTE: accessing them after subsetting .obs + # NOTE: accessing them after subsetting .obs/.var # will fail due to _validate_value() - obsm = dict(data.obsm) - obsp = dict(data.obsp) + attrm = dict(attrm) + attrp = dict(attrp) - # Subset .obs - data._obs = data.obs[obs_subset] - data._n_obs = data.obs.shape[0] + # Subset .obs/.var + setattr(data, f"_{attr}", df[subset]) - # Subset .obsm - for k, v in obsm.items(): - obsm[k] = v[obs_subset] - data.obsm = obsm + # Subset .obsm/.varm + for k, v in attrm.items(): + attrm[k] = v[subset] + setattr(data, f"{attr}m", attrm) - # Subset .obsp - for k, v in obsp.items(): - obsp[k] = v[obs_subset][:, obs_subset] - data.obsp = obsp + # Subset .obsp/.obsp + for k, v in attrp.items(): + attrp[k] = v[subset][:, subset] + setattr(data, f"{attr}p", attrp) # Subset .X if data._X is not None: try: - data._X = data.X[obs_subset, :] + if attr == "obs": + data._X = data.X[subset, :] + else: + data._X = data.X[:, subset] except TypeError: - data._X = data.X[np.where(obs_subset)[0], :] + if attr == "obs": + data._X = data.X[np.where(subset)[0], :] + else: + data._X = data.X[:, np.where(subset)[0]] # For some h5py versions, indexing arrays must have integer dtypes # https://github.com/h5py/h5py/issues/1847 @@ -756,165 +774,69 @@ def func(x): # Subset layers for layer in data.layers: - data.layers[layer] = data.layers[layer][obs_subset, :] + if attr == "obs": + data.layers[layer] = data.layers[layer][subset, :] + else: + data.layers[layer] = data.layers[layer][:, subset] - # Subset raw - if data.raw is not None: - data.raw._X = data.raw.X[obs_subset, :] - data.raw._n_obs = data.raw.X.shape[0] + # Subset raw - only when subsetting obs + if attr == "obs" and data.raw is not None: + data.raw._X = data.raw.X[subset, :] else: - # Subset .obs - data._obs = data.obs[obs_subset] - data._n_obs = data.obs.shape[0] + attrmap = getattr(data, f"{attr}map") + + # Subset .obs/.var + setattr(data, f"_{attr}", df[subset]) - # Subset .obsm - for k, v in data.obsm.items(): - data.obsm[k] = v[obs_subset] + # Subset .obsm/.varm + for k, v in attrm.items(): + attrm[k] = v[subset] + setattr(data, f"{attr}m", attrm) - # Subset .obsp - for k, v in data.obsp.items(): - data.obsp[k] = v[obs_subset][:, obs_subset] + # Subset .obsp/.varp + for k, v in attrp.items(): + attrp[k] = v[subset][:, subset] + setattr(data, f"{attr}p", attrp) - # filter_obs() for each modality + # _filter_attr() for each modality for m, mod in data.mod.items(): - obsmap = data.obsmap[m][obs_subset] - obsidx = obsmap > 0 - orig_obs = mod.obs.copy() - filter_obs(mod, mod.obs_names[obsmap[obsidx] - 1]) - data.mod[m]._remove_unused_categories(orig_obs, mod.obs, mod.uns) - maporder = np.argsort(obsmap[obsidx]) + map_subset = attrmap[m][subset] + attridx = map_subset > 0 + orig_attr = getattr(mod, attr).copy() + mod_names = getattr(mod, f"{attr}_names") + _filter_attr(mod, attr, mod_names[map_subset[attridx] - 1]) + data.mod[m]._remove_unused_categories(orig_attr, getattr(mod, attr), mod.uns) + maporder = np.argsort(map_subset[attridx]) nobsmap = np.empty(maporder.size) nobsmap[maporder] = np.arange(1, maporder.size + 1) - obsmap[obsidx] = nobsmap - data.obsmap[m] = obsmap + map_subset[attridx] = nobsmap + getattr(data, f"{attr}map")[m] = map_subset return -# Utility functions: filtering variables - - -def filter_var( +def filter_obs( data: Union[AnnData, MuData], var: Union[str, Sequence[str]], func: Optional[Callable] = None -): +) -> None: """ - Filter variables (features, e.g. genes) in-place - using any column in .var or row in .X. + Filter observations (samples or cells) in-place + using any column in .obs or in .X. Parameters ---------- data: AnnData or MuData AnnData or MuData object var: str or Sequence[str] - Column name in .var or row name in .X to be used for filtering. - Alternatively, var_names can be provided directly. + Column name in .obs or in .X to be used for filtering. + Alternatively, obs_names can be provided directly. func Function to apply to the variable used for filtering. If the variable is of type boolean and func is an identity function, the func argument can be omitted. """ - if data.is_view: - raise ValueError( - "The provided adata is a view. In-place filtering does not operate on views." - ) - if data.isbacked: - if isinstance(data, AnnData): - warnings.warn( - "AnnData object is backed. The requested subset of the matrix .X will be read into memory, and the object will not be backed anymore." - ) - else: - warnings.warn( - "MuData object is backed. The requested subset of the .X matrices of its modalities will be read into memory, and the object will not be backed anymore." - ) - - if isinstance(var, str): - if var in data.var.columns: - if func is None: - if data.var[var].dtypes.name == "bool": - - def func(x): - return x - - else: - raise ValueError(f"Function has to be provided since {var} is not boolean") - var_subset = func(data.var[var].values) - elif var in data.obs_names: - var_subset = func(data.X[:, np.where(data.obs_names == var)[0]].reshape(-1)) - else: - raise ValueError( - f"Column name from .var or one of the obs_names was expected but got {var}." - ) - else: - if func is None: - var_subset = var if np.array(var).dtype == bool else data.var_names.isin(var) - else: - raise ValueError("When providing var_names directly, func has to be None.") - - if isinstance(data, AnnData): - # Collect elements to subset - # NOTE: accessing them after subsetting .var - # will fail due to _validate_value() - varm = dict(data.varm) - varp = dict(data.varp) - - # Subset .obs - data._var = data.var[var_subset] - data._n_vars = data.var.shape[0] - - # Subset .obsm - for k, v in varm.items(): - varm[k] = v[var_subset] - data.varm = varm - - # Subset .obsp - for k, v in varp.items(): - varp[k] = v[var_subset][:, var_subset] - data.varp = varp - - # Subset .X - try: - data._X = data.X[:, var_subset] - except TypeError: - data._X = data.X[:, np.where(var_subset)[0]] - # For some h5py versions, indexing arrays must have integer dtypes - # https://github.com/h5py/h5py/issues/1847 - if data.isbacked: - data.file.close() - data.filename = None - - # Subset layers - for layer in data.layers: - data.layers[layer] = data.layers[layer][:, var_subset] - - # NOTE: .raw is not subsetted - - else: - # Subset .var - data._var = data.var[var_subset] - data._n_vars = data.var.shape[0] - - # Subset .varm - for k, v in data.varm.items(): - data.varm[k] = v[var_subset] - - # Subset .varp - for k, v in data.varp.items(): - data.varp[k] = v[var_subset][:, var_subset] - - # filter_var() for each modality - for m, mod in data.mod.items(): - varmap = data.varmap[m][var_subset] - varidx = varmap > 0 - orig_var = mod.var.copy() - filter_var(mod, mod.var_names[varmap[varidx] - 1]) - data.mod[m]._remove_unused_categories(orig_var, mod.var, mod.uns) - maporder = np.argsort(varmap[varidx]) - nvarmap = np.empty(maporder.size) - nvarmap[maporder] = np.arange(1, maporder.size + 1) - varmap[varidx] = nvarmap - data.varmap[m] = varmap + _filter_attr(data, "obs", var, func) return From 8d38a0307715b8e2db42b2e1f741d6de6eaf1b7b Mon Sep 17 00:00:00 2001 From: Danila <32863903+gtca@users.noreply.github.com> Date: Thu, 17 Oct 2024 01:05:56 +0200 Subject: [PATCH 23/27] Update pythonpackage.yml Remember to put 3.10 in quotes --- .github/workflows/pythonpackage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 937e62f..bf7659d 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.10, 3.11, 3.12] + python-version: ["3.10", 3.11, 3.12] steps: - uses: actions/checkout@v4 From 10060b5a8959241e68c16168d110af3feb3eece2 Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 16:18:59 -0700 Subject: [PATCH 24/27] Fix missing filter_var() --- muon/_core/preproc.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/muon/_core/preproc.py b/muon/_core/preproc.py index b108797..04c0735 100644 --- a/muon/_core/preproc.py +++ b/muon/_core/preproc.py @@ -841,6 +841,31 @@ def filter_obs( return +def filter_var( + data: Union[AnnData, MuData], var: Union[str, Sequence[str]], func: Optional[Callable] = None +): + """ + Filter variables (features, e.g. genes) in-place + using any column in .var or row in .X. + + Parameters + ---------- + data: AnnData or MuData + AnnData or MuData object + var: str or Sequence[str] + Column name in .var or row name in .X to be used for filtering. + Alternatively, var_names can be provided directly. + func + Function to apply to the variable used for filtering. + If the variable is of type boolean and func is an identity function, + the func argument can be omitted. + """ + + _filter_attr(data, "var", var, func) + + return + + # Subsampling observations From 52f9e1d8d53cb1cc0d01edb66f45fb7b38ecb18b Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 16:36:56 -0700 Subject: [PATCH 25/27] Use sc.pl.scatter in mu.pl.scatter --- muon/_core/plot.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/muon/_core/plot.py b/muon/_core/plot.py index 7d72f3e..752518c 100644 --- a/muon/_core/plot.py +++ b/muon/_core/plot.py @@ -53,9 +53,7 @@ def scatter( No layer is used by default. A single layer value will be expanded to [layer, layer, layer]. """ if isinstance(data, AnnData): - return sc.pl.embedding( - data, x=x, y=y, color=color, use_raw=use_raw, layers=layers, **kwargs - ) + return sc.pl.scatter(data, x=x, y=y, color=color, use_raw=use_raw, layers=layers, **kwargs) if isinstance(layers, str) or layers is None: layers = [layers, layers, layers] @@ -74,7 +72,6 @@ def scatter( color_obs = pd.DataFrame({color: color_obs}) else: raise TypeError("Expected color to be a string.") - color_obs.index = data.obs_names obs = pd.concat([obs, color_obs], axis=1, ignore_index=False) @@ -93,7 +90,6 @@ def scatter( return retval - # # Embedding # From 34c9de2b8af15bf735613566abf758f2e240d669 Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 16:51:36 -0700 Subject: [PATCH 26/27] Add a simple test for mu.pl.scatter() --- tests/test_muon_plot.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/test_muon_plot.py diff --git a/tests/test_muon_plot.py b/tests/test_muon_plot.py new file mode 100644 index 0000000..d1642cf --- /dev/null +++ b/tests/test_muon_plot.py @@ -0,0 +1,31 @@ +import pytest + +import numpy as np +from scipy import sparse +import pandas as pd +from anndata import AnnData +import muon as mu +from muon import MuData +import matplotlib + +matplotlib.use("Agg") + + +@pytest.fixture() +def mdata(): + mdata = MuData( + { + "mod1": AnnData(np.arange(0, 100, 0.1).reshape(-1, 10)), + "mod2": AnnData(np.arange(101, 2101, 1).reshape(-1, 20)), + } + ) + mdata.var_names_make_unique() + yield mdata + + +class TestScatter: + def test_pl_scatter(self, mdata): + mdata = mdata.copy() + np.random.seed(42) + mdata.obs["condition"] = np.random.choice(["a", "b"], mdata.n_obs) + mu.pl.scatter(mdata, x="mod1:0", y="mod2:0", color="condition") From 9b98f5192458605d0bd2d86e3a7a029c0bbbf484 Mon Sep 17 00:00:00 2001 From: Danila Date: Wed, 16 Oct 2024 17:10:28 -0700 Subject: [PATCH 27/27] Propagate error to scanpy accoding to its promised functionality --- muon/_core/plot.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/muon/_core/plot.py b/muon/_core/plot.py index 752518c..52be000 100644 --- a/muon/_core/plot.py +++ b/muon/_core/plot.py @@ -1,4 +1,4 @@ -from typing import Union, List, Optional, Iterable, Sequence, Dict +from typing import Dict, Iterable, List, Optional, Sequence, Union import warnings from matplotlib.axes import Axes @@ -22,7 +22,7 @@ def scatter( data: Union[AnnData, MuData], x: Optional[str] = None, y: Optional[str] = None, - color: Optional[str] = None, + color: Optional[Union[str, Sequence[str]]] = None, use_raw: Optional[bool] = None, layers: Optional[Union[str, Sequence[str]]] = None, **kwargs, @@ -42,8 +42,8 @@ def scatter( x coordinate y : Optional[str] y coordinate - color : Optional[str], optional (default: None) - Key for variables or annotations of observations (.obs columns), + color : Optional[Union[str, Sequence[str]]], optional (default: None) + Keys or a single key for variables or annotations of observations (.obs columns), or a hex colour specification. use_raw : Optional[bool], optional (default: None) Use `.raw` attribute of the modality where a feature (from `color`) is derived from. @@ -71,7 +71,7 @@ def scatter( color_obs = _get_values(data, color, use_raw=use_raw, layer=layers[2]) color_obs = pd.DataFrame({color: color_obs}) else: - raise TypeError("Expected color to be a string.") + color_obs = _get_values(data, color, use_raw=use_raw, layer=layers[2]) color_obs.index = data.obs_names obs = pd.concat([obs, color_obs], axis=1, ignore_index=False)