From eadfdd80cd843e6aacb05012cb19ae778877ef71 Mon Sep 17 00:00:00 2001 From: jameschapman19 Date: Fri, 18 Aug 2023 13:13:21 +0000 Subject: [PATCH] Format code with black --- cca_zoo/_base.py | 93 ++++++++++++++++++--------------- cca_zoo/linear/_pls.py | 1 - test/test_explained_variance.py | 35 ++++++++++--- test/test_nonparametric.py | 8 +-- 4 files changed, 79 insertions(+), 58 deletions(-) diff --git a/cca_zoo/_base.py b/cca_zoo/_base.py index 3fed6c9c..ff8736e8 100644 --- a/cca_zoo/_base.py +++ b/cca_zoo/_base.py @@ -129,7 +129,9 @@ def fit_transform(self, views: Iterable[np.ndarray], **kwargs) -> List[np.ndarra """ return self.fit(views, **kwargs).transform(views, **kwargs) - def pairwise_correlations(self, views: Iterable[np.ndarray], **kwargs) -> np.ndarray: + def pairwise_correlations( + self, views: Iterable[np.ndarray], **kwargs + ) -> np.ndarray: """ Returns the pairwise correlations between the views in each dimension @@ -161,7 +163,9 @@ def pairwise_correlations(self, views: Iterable[np.ndarray], **kwargs) -> np.nda print() return all_corrs - def score(self, views: Iterable[np.ndarray], y: Optional[Any] = None, **kwargs) -> float: + def score( + self, views: Iterable[np.ndarray], y: Optional[Any] = None, **kwargs + ) -> float: """ Returns the average pairwise correlation between the views @@ -186,7 +190,9 @@ def score(self, views: Iterable[np.ndarray], y: Optional[Any] = None, **kwargs) dim_corrs = dim_corrs / (2 * num_pairs) return dim_corrs - def canonical_loadings(self, views: Iterable[np.ndarray], normalize: bool = True, **kwargs) -> List[np.ndarray]: + def canonical_loadings( + self, views: Iterable[np.ndarray], normalize: bool = True, **kwargs + ) -> List[np.ndarray]: """ Calculate canonical loadings for each view. @@ -234,15 +240,15 @@ def canonical_loadings(self, views: Iterable[np.ndarray], normalize: bool = True def loadings(self) -> List[np.ndarray]: """ Compute and return loadings for each view. These are cached for performance optimization. - + In the context of the cca-zoo models, loadings are the normalized weights. Due to the structure of these models, weight vectors are normalized such that w'X'Xw = 1, as opposed to w'w = 1, which is commonly used in PCA. As a result, when computing the loadings, the weights are normalized to have unit norm, ensuring that the loadings range between -1 and 1. - + It's essential to differentiate between these loadings and canonical loadings. The latter are correlations between the original variables and their corresponding canonical variates. - + Returns ------- List[np.ndarray] @@ -259,28 +265,29 @@ def loadings(self) -> List[np.ndarray]: def explained_variance(self, views: Iterable[np.ndarray]) -> List[np.ndarray]: """ Calculates the variance captured by each latent dimension for each view. - + Returns ------- transformed_vars : list of numpy arrays """ check_is_fitted(self, attributes=["weights"]) - + # Transform the views using the loadings - transformed_views = [view @ loading for view, loading in zip(views, self.loadings)] - + transformed_views = [ + view @ loading for view, loading in zip(views, self.loadings) + ] + # Calculate the variance of each latent dimension in the transformed views transformed_vars = [ np.var(transformed, axis=0) for transformed in transformed_views ] - + return transformed_vars - - + def explained_variance_ratio(self, views: Iterable[np.ndarray]) -> List[np.ndarray]: """ Calculates the ratio of the variance captured by each latent dimension to the total variance for each view. - + Returns ------- explained_variance_ratios : list of numpy arrays @@ -290,106 +297,106 @@ def explained_variance_ratio(self, views: Iterable[np.ndarray]) -> List[np.ndarr for view in views for _, s, _ in [svd(view)] ] - + transformed_vars = self.explained_variance(views) - + # Calculate the explained variance ratio for each latent dimension for each view explained_variance_ratios = [ transformed_var / total_var for transformed_var, total_var in zip(transformed_vars, total_vars) ] - + return explained_variance_ratios - - + def explained_variance_cumulative( self, views: Iterable[np.ndarray] ) -> List[np.ndarray]: """ Calculates the cumulative explained variance ratio for each latent dimension for each view. - + Returns ------- cumulative_ratios : list of numpy arrays """ ratios = self.explained_variance_ratio(views) cumulative_ratios = [np.cumsum(ratio) for ratio in ratios] - + return cumulative_ratios def _compute_covariance(self, views: Iterable[np.ndarray]) -> np.ndarray: """ Computes the covariance matrix for the given views. - + Parameters ---------- views : list/tuple of numpy arrays or array likes with the same number of rows (samples) - + Returns ------- cov : numpy array Computed covariance matrix. """ cov = np.cov(np.hstack(views), rowvar=False) - cov -= block_diag(*[np.cov(view,rowvar=False) for view in views]) + cov -= block_diag(*[np.cov(view, rowvar=False) for view in views]) return cov - def explained_covariance(self, views: Iterable[np.ndarray]) -> np.ndarray: """ Calculates the covariance matrix of the transformed components for each view. - + Parameters ---------- views : list/tuple of numpy arrays or array likes with the same number of rows (samples) - + Returns ------- explained_covariances : list of numpy arrays Covariance matrices for the transformed components of each view. """ check_is_fitted(self, attributes=["weights"]) - + # Transform the views using the loadings - transformed_views = [view @ loading for view, loading in zip(views, self.loadings)] + transformed_views = [ + view @ loading for view, loading in zip(views, self.loadings) + ] - k=transformed_views[0].shape[1] + k = transformed_views[0].shape[1] explained_covariances = np.zeros(k) # just take the kth column of each transformed view and _compute_covariance for i in range(k): - transformed_views_k = [view[:,i][:,None] for view in transformed_views] + transformed_views_k = [view[:, i][:, None] for view in transformed_views] cov_ = self._compute_covariance(transformed_views_k) - _,s_,_=svd(cov_) + _, s_, _ = svd(cov_) explained_covariances[i] = s_[0] - + return explained_covariances - - + def explained_covariance_ratio(self, views: Iterable[np.ndarray]) -> np.ndarray: minimum_dimension = min([view.shape[1] for view in views]) - + cov = self._compute_covariance(views) _, S, _ = svd(cov) # select every other element starting from the first until the minimum dimension total_explained_covariance = S[::2][:minimum_dimension].sum() - + explained_covariances = self.explained_covariance(views) explained_covariance_ratios = explained_covariances / total_explained_covariance - + return explained_covariance_ratios - - - def explained_covariance_cumulative(self, views: Iterable[np.ndarray]) -> np.ndarray: + + def explained_covariance_cumulative( + self, views: Iterable[np.ndarray] + ) -> np.ndarray: """ Calculates the cumulative explained covariance ratio for each latent dimension for each view. - + Returns ------- cumulative_ratios : list of numpy arrays """ ratios = self.explained_covariance_ratio(views) cumulative_ratios = [np.cumsum(ratio) for ratio in ratios] - + return cumulative_ratios diff --git a/cca_zoo/linear/_pls.py b/cca_zoo/linear/_pls.py index 41346fe3..130fa3c0 100644 --- a/cca_zoo/linear/_pls.py +++ b/cca_zoo/linear/_pls.py @@ -11,7 +11,6 @@ def reduce_dims(x): class PLSMixin: - def _more_tags(self): # Indicate that this class is for multiview data return {"pls": True} diff --git a/test/test_explained_variance.py b/test/test_explained_variance.py index a11da240..2026726f 100644 --- a/test/test_explained_variance.py +++ b/test/test_explained_variance.py @@ -13,7 +13,7 @@ def rng(): @pytest.fixture def toy_model(rng): model = BaseModel() - model.weights = [rng.randn(10, 3), rng.randn(8, 3), rng.randn(5,3)] + model.weights = [rng.randn(10, 3), rng.randn(8, 3), rng.randn(5, 3)] return model @@ -29,13 +29,16 @@ def synthetic_views(rng): view3 -= view3.mean(axis=0) return [view1, view2, view3] + def test_explained_variance_ratio(toy_model, synthetic_views): explained_variance_ratios = toy_model.explained_variance_ratio(synthetic_views) # Verify if the ratios are between 0 and 1 for each latent dimension in each view for ratios in explained_variance_ratios: for ratio in ratios: - assert 0 <= ratio <= 1, f"Explained variance ratio should be between 0 and 1, but got {ratio}" + assert ( + 0 <= ratio <= 1 + ), f"Explained variance ratio should be between 0 and 1, but got {ratio}" def test_transformed_covariance_ratio(toy_model, synthetic_views): @@ -43,39 +46,55 @@ def test_transformed_covariance_ratio(toy_model, synthetic_views): pls = MPLS(latent_dimensions=maximum_dimension).fit(synthetic_views) pls_cov_ratios = pls.explained_covariance_ratio(synthetic_views) # sum of these should be 1 within a small tolerance - assert np.isclose(np.sum(pls_cov_ratios), 1, atol=1e-2), "Expected sum of ratios to be 1" + assert np.isclose( + np.sum(pls_cov_ratios), 1, atol=1e-2 + ), "Expected sum of ratios to be 1" cov_ratios = toy_model.explained_covariance_ratio(synthetic_views) # Verify if the ratios are between 0 and 1 for each latent dimension in each view for ratio in cov_ratios: - assert 0 <= ratio <= 1, f"Explained covariance ratio should be between 0 and 1, but got {ratio}" + assert ( + 0 <= ratio <= 1 + ), f"Explained covariance ratio should be between 0 and 1, but got {ratio}" def test_explained_variance(toy_model, synthetic_views): explained_vars = toy_model.explained_variance(synthetic_views) - assert all(isinstance(var, np.ndarray) for var in explained_vars), "Expected numpy arrays" + assert all( + isinstance(var, np.ndarray) for var in explained_vars + ), "Expected numpy arrays" assert all(var.ndim == 1 for var in explained_vars), "Expected 1-dimensional arrays" + def test_explained_variance_cumulative(toy_model, synthetic_views): cumulative_ratios = toy_model.explained_variance_cumulative(synthetic_views) # Verifying if the ratios are increasing for each latent dimension in each view for ratios in cumulative_ratios: - assert np.all(np.diff(ratios) >= 0), "Expected cumulative ratios to be non-decreasing" + assert np.all( + np.diff(ratios) >= 0 + ), "Expected cumulative ratios to be non-decreasing" + def test_explained_covariance(toy_model, synthetic_views): explained_covariances = toy_model.explained_covariance(synthetic_views) assert isinstance(explained_covariances, np.ndarray), "Expected a numpy array" assert explained_covariances.ndim == 1, "Expected 1-dimensional array" + def test_explained_covariance_ratio(toy_model, synthetic_views): explained_covariance_ratios = toy_model.explained_covariance_ratio(synthetic_views) # Verifying if the ratios are between 0 and 1 for each latent dimension in each view for ratio in explained_covariance_ratios: - assert 0 <= ratio <= 1, f"Explained covariance ratio should be between 0 and 1, but got {ratio}" + assert ( + 0 <= ratio <= 1 + ), f"Explained covariance ratio should be between 0 and 1, but got {ratio}" + def test_explained_covariance_cumulative(toy_model, synthetic_views): cumulative_ratios = toy_model.explained_covariance_cumulative(synthetic_views) # Verifying if the ratios are increasing for each latent dimension in each view for ratios in cumulative_ratios: - assert np.all(np.diff(ratios) >= 0), "Expected cumulative ratios to be non-decreasing" + assert np.all( + np.diff(ratios) >= 0 + ), "Expected cumulative ratios to be non-decreasing" diff --git a/test/test_nonparametric.py b/test/test_nonparametric.py index e6feb195..c5b96f5b 100644 --- a/test/test_nonparametric.py +++ b/test/test_nonparametric.py @@ -19,11 +19,7 @@ def data(): def test_equivalence_with_linear_kernel(data): X, Y, Z = data - kernel_tests = [ - (MCCA, KCCA), - (GCCA, KGCCA), - (TCCA, KTCCA) - ] + kernel_tests = [(MCCA, KCCA), (GCCA, KGCCA), (TCCA, KTCCA)] for model1, model2 in kernel_tests: instance1 = model1(latent_dimensions=2).fit([X, Y, Z]) @@ -33,7 +29,7 @@ def test_equivalence_with_linear_kernel(data): assert np.allclose(score1, score2), f"Scores differ for {model1} and {model2}" -@pytest.mark.parametrize('kernel', ['rbf', 'poly', 'sigmoid', 'cosine']) +@pytest.mark.parametrize("kernel", ["rbf", "poly", "sigmoid", "cosine"]) def test_kernel_types(kernel, data): X, Y, Z = data models = [KCCA, KGCCA, KTCCA]