diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b2fef54..d46b3f2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,15 +21,15 @@ repos: - id: nbstripout - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.6 + rev: v3.0.3 hooks: - id: prettier additional_dependencies: - - prettier@2.3.2 + - prettier@3.0.3 - "prettier-plugin-toml" - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.22.0 + rev: 0.23.2 hooks: - id: check-github-workflows - id: check-readthedocs diff --git a/CHANGELOG.md b/CHANGELOG.md index 467d65b..a1483e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,13 @@ All notable changes to the `pbiotools` package will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/), and this project adheres to [Semantic Versioning](http://semver.org/). -## [Unreleased] - started 2023-02 +## [Unreleased] - started 2023-09 + +## [4.0.2] - 2023-09 + +### Removed + +- `math_utils.get_kth_fold` (more_itertools) ## [4.0.1] - 2023-02 diff --git a/environment.yml b/environment.yml index 36f22e8..56a88d5 100644 --- a/environment.yml +++ b/environment.yml @@ -11,7 +11,6 @@ dependencies: - joblib - matplotlib-base - matplotlib-venn # ? - - more-itertools - mygene # ? - numpy - openpyxl # ? diff --git a/pyproject.toml b/pyproject.toml index 1cf6ff3..b34b9f0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,6 @@ dependencies = [ "joblib", "matplotlib", "matplotlib_venn", - "more_itertools", "mygene", "numpy", "openpyxl", diff --git a/src/pbiotools/__init__.py b/src/pbiotools/__init__.py index 0945de0..5ad8a5f 100644 --- a/src/pbiotools/__init__.py +++ b/src/pbiotools/__init__.py @@ -1,2 +1,2 @@ -__version_info__ = ("4", "0", "1") +__version_info__ = ("4", "0", "2") __version__ = ".".join(__version_info__) diff --git a/src/pbiotools/misc/math_utils.py b/src/pbiotools/misc/math_utils.py index 2ae9ca8..874feca 100644 --- a/src/pbiotools/misc/math_utils.py +++ b/src/pbiotools/misc/math_utils.py @@ -17,7 +17,6 @@ import itertools from enum import Enum -import more_itertools import numpy as np import pandas as pd import scipy.stats @@ -1268,52 +1267,3 @@ def calc_provost_and_domingos_auc(y_true, y_score): fold_tuple_fields = ["X_train", "y_train", "X_test", "y_test"] fold_tuple = collections.namedtuple("fold", " ".join(fold_tuple_fields)) - - -def get_kth_fold(X, y, fold, num_folds=10, random_seed=8675309): - """Select the kth cross-validation fold using stratified CV - - In partcular, this function uses `sklearn.model_selection.StratifiedKFold` - to split the data. It then selects the training and testing splits - from the k^th fold. - - N.B. If `y` is None, the simple `KFold` is used instead. - - Parameters - ---------- - X, y: sklearn-formated data matrices - - fold: int - The cv fold - - num_folds: int - The total number of folds - - random_seed: int or random state - The value used a the random seed for the k-fold split - """ - - check_range(fold, 0, num_folds, max_inclusive=False, variable_name="fold") - - if y is None: - cv = sklearn.model_selection.KFold(n_splits=num_folds, random_state=random_seed) - else: - cv = sklearn.model_selection.StratifiedKFold( - n_splits=num_folds, random_state=random_seed - ) - - splits = cv.split(X, y) - train, test = more_itertools.nth(splits, fold) - - X_train = X[train] - X_test = X[test] - - if y is None: - y_train = None - y_test = None - else: - y_train = y[train] - y_test = y[test] - - ret = fold_tuple(X_train, y_train, X_test, y_test) - return ret