From d52830a818dba413c5c14e0438b81ddb70349f1c Mon Sep 17 00:00:00 2001 From: Hsiang-Cheng Yang Date: Fri, 24 Mar 2023 07:15:54 +0800 Subject: [PATCH 01/15] Update arrays.rs (#253) fix a typo --- src/linalg/basic/arrays.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/linalg/basic/arrays.rs b/src/linalg/basic/arrays.rs index e11bb6cd..7d0c77a6 100644 --- a/src/linalg/basic/arrays.rs +++ b/src/linalg/basic/arrays.rs @@ -1570,7 +1570,7 @@ pub trait Array2: MutArrayView2 + Sized + mean } - /// copy coumn as a vector + /// copy column as a vector fn copy_col_as_vec(&self, col: usize, result: &mut Vec) { for (r, result_r) in result.iter_mut().enumerate().take(self.shape().0) { *result_r = *self.get((r, col)); From 9cd7348403c7548bb7315ea75995943f9f317fbe Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 10 Apr 2023 15:13:27 +0100 Subject: [PATCH 02/15] Update CONTRIBUTING.md --- .github/CONTRIBUTING.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 15b39063..895db0f5 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -37,6 +37,8 @@ $ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 ``` * find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. +* Please take a look to the output of a profiler to spot most evident performance problems, see [this guide about using a profiler](http://www.codeofview.com/fix-rs/2017/01/24/how-to-optimize-rust-programs-on-linux/). + ## Issue Report Process 1. Go to the project's issues. From 8939ed93b938434fd9982bf1140b8a73abfe3309 Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Tue, 25 Apr 2023 12:35:58 -0400 Subject: [PATCH 03/15] chore: fix clippy warnings from Rust release 1.69 (#263) * chore: fix clippy warnings from Rust release 1.69 * chore: run `cargo fmt` * refactor: remove unused type parameter --------- Co-authored-by: Luis Moreno --- src/linalg/basic/matrix.rs | 12 ++++++------ src/model_selection/kfold.rs | 8 ++------ src/readers/csv.rs | 12 +++--------- 3 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index f21e04fe..e108cea4 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -431,9 +431,9 @@ impl SVDDecomposable for DenseMatrix {} impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView<'a, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { - &self.values[(pos.0 + pos.1 * self.stride)] + &self.values[pos.0 + pos.1 * self.stride] } else { - &self.values[(pos.0 * self.stride + pos.1)] + &self.values[pos.0 * self.stride + pos.1] } } @@ -495,9 +495,9 @@ impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for DenseMatrixView<'a impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixMutView<'a, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { - &self.values[(pos.0 + pos.1 * self.stride)] + &self.values[pos.0 + pos.1 * self.stride] } else { - &self.values[(pos.0 * self.stride + pos.1)] + &self.values[pos.0 * self.stride + pos.1] } } @@ -519,9 +519,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray { fn set(&mut self, pos: (usize, usize), x: T) { if self.column_major { - self.values[(pos.0 + pos.1 * self.stride)] = x; + self.values[pos.0 + pos.1 * self.stride] = x; } else { - self.values[(pos.0 * self.stride + pos.1)] = x; + self.values[pos.0 * self.stride + pos.1] = x; } } diff --git a/src/model_selection/kfold.rs b/src/model_selection/kfold.rs index 760881b7..d7ad22d2 100644 --- a/src/model_selection/kfold.rs +++ b/src/model_selection/kfold.rs @@ -283,9 +283,7 @@ mod tests { (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), ]; - for ((train, test), (expected_train, expected_test)) in - k.split(&x).into_iter().zip(expected) - { + for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) { assert_eq!(test, expected_test); assert_eq!(train, expected_train); } @@ -307,9 +305,7 @@ mod tests { (vec![0, 1, 2, 3, 7, 8, 9], vec![4, 5, 6]), (vec![0, 1, 2, 3, 4, 5, 6], vec![7, 8, 9]), ]; - for ((train, test), (expected_train, expected_test)) in - k.split(&x).into_iter().zip(expected) - { + for ((train, test), (expected_train, expected_test)) in k.split(&x).zip(expected) { assert_eq!(test.len(), expected_test.len()); assert_eq!(train.len(), expected_train.len()); } diff --git a/src/readers/csv.rs b/src/readers/csv.rs index 730f2932..d67d4b5f 100644 --- a/src/readers/csv.rs +++ b/src/readers/csv.rs @@ -83,7 +83,7 @@ where Matrix: Array2, { let csv_text = read_string_from_source(source)?; - let rows: Vec> = extract_row_vectors_from_csv_text::( + let rows: Vec> = extract_row_vectors_from_csv_text( &csv_text, &definition, detect_row_format(&csv_text, &definition)?, @@ -103,12 +103,7 @@ where /// Given a string containing the contents of a csv file, extract its value /// into row-vectors. -fn extract_row_vectors_from_csv_text< - 'a, - T: Number + RealNumber + std::str::FromStr, - RowVector: Array1, - Matrix: Array2, ->( +fn extract_row_vectors_from_csv_text<'a, T: Number + RealNumber + std::str::FromStr>( csv_text: &'a str, definition: &'a CSVDefinition<'_>, row_format: CSVRowFormat<'_>, @@ -305,12 +300,11 @@ mod tests { } mod extract_row_vectors_from_csv_text { use super::super::{extract_row_vectors_from_csv_text, CSVDefinition, CSVRowFormat}; - use crate::linalg::basic::matrix::DenseMatrix; #[test] fn read_default_csv() { assert_eq!( - extract_row_vectors_from_csv_text::, DenseMatrix<_>>( + extract_row_vectors_from_csv_text::( "column 1, column 2, column3\n1.0,2.0,3.0\n4.0,5.0,6.0", &CSVDefinition::default(), CSVRowFormat { From 545ed6ce2b19f73c31a445ac3fafc09551adc19c Mon Sep 17 00:00:00 2001 From: Ruben De Smet Date: Wed, 26 Apr 2023 15:46:26 +0200 Subject: [PATCH 04/15] Remove some allocations (#262) * Remove some allocations * Remove some more allocations --- src/linalg/basic/vector.rs | 20 +++++++ src/svm/svc.rs | 119 +++++++++++++++++-------------------- src/svm/svr.rs | 16 +++-- 3 files changed, 82 insertions(+), 73 deletions(-) diff --git a/src/linalg/basic/vector.rs b/src/linalg/basic/vector.rs index 99da9819..5d79ab22 100644 --- a/src/linalg/basic/vector.rs +++ b/src/linalg/basic/vector.rs @@ -15,6 +15,25 @@ pub struct VecView<'a, T: Debug + Display + Copy + Sized> { ptr: &'a [T], } +impl Array for &[T] { + fn get(&self, i: usize) -> &T { + &self[i] + } + + fn shape(&self) -> usize { + self.len() + } + + fn is_empty(&self) -> bool { + self.len() > 0 + } + + fn iterator<'b>(&'b self, axis: u8) -> Box + 'b> { + assert!(axis == 0, "For one dimensional array `axis` should == 0"); + Box::new(self.iter()) + } +} + impl Array for Vec { fn get(&self, i: usize) -> &T { &self[i] @@ -46,6 +65,7 @@ impl MutArray for Vec { } impl ArrayView1 for Vec {} +impl ArrayView1 for &[T] {} impl MutArrayView1 for Vec {} diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 131f44c2..252d43af 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -322,19 +322,26 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array let (n, _) = x.shape(); let mut y_hat: Vec = Array1::zeros(n); + let mut row = Vec::with_capacity(n); for i in 0..n { - let row_pred: TX = - self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)); + row.clear(); + row.extend(x.get_row(i).iterator(0).copied()); + let row_pred: TX = self.predict_for_row(&row); y_hat.set(i, row_pred); } Ok(y_hat) } - fn predict_for_row(&self, x: Vec) -> TX { + fn predict_for_row(&self, x: &[TX]) -> TX { let mut f = self.b.unwrap(); + let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.instances.as_ref().unwrap().len() { + let xj: Vec<_> = self.instances.as_ref().unwrap()[i] + .iter() + .map(|e| e.to_f64().unwrap()) + .collect(); f += self.w.as_ref().unwrap()[i] * TX::from( self.parameters @@ -343,13 +350,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array .kernel .as_ref() .unwrap() - .apply( - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.instances.as_ref().unwrap()[i] - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), - ) + .apply(&xi, &xj) .unwrap(), ) .unwrap(); @@ -472,14 +473,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let tol = self.parameters.tol; let good_enough = TX::from_i32(1000).unwrap(); + let mut x = Vec::with_capacity(n); for _ in 0..self.parameters.epoch { for i in self.permutate(n) { - self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - &mut cache, - ); + x.clear(); + x.extend(self.x.get_row(i).iterator(0).take(n).copied()); + self.process(i, &x, *self.y.get(i), &mut cache); loop { self.reprocess(tol, &mut cache); self.find_min_max_gradient(); @@ -511,24 +510,17 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let mut cp = 0; let mut cn = 0; + let mut x = Vec::with_capacity(n); for i in self.permutate(n) { + x.clear(); + x.extend(self.x.get_row(i).iterator(0).take(n).copied()); if *self.y.get(i) == TY::one() && cp < few { - if self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - cache, - ) { + if self.process(i, &x, *self.y.get(i), cache) { cp += 1; } } else if *self.y.get(i) == TY::from(-1).unwrap() && cn < few - && self.process( - i, - Vec::from_iterator(self.x.get_row(i).iterator(0).copied(), n), - *self.y.get(i), - cache, - ) + && self.process(i, &x, *self.y.get(i), cache) { cn += 1; } @@ -539,7 +531,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } - fn process(&mut self, i: usize, x: Vec, y: TY, cache: &mut Cache) -> bool { + fn process(&mut self, i: usize, x: &[TX], y: TY, cache: &mut Cache) -> bool { for j in 0..self.sv.len() { if self.sv[j].index == i { return true; @@ -551,15 +543,14 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let mut cache_values: Vec<((usize, usize), TX)> = Vec::new(); for v in self.sv.iter() { + let xi: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); + let xj: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); let k = self .parameters .kernel .as_ref() .unwrap() - .apply( - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(); cache_values.push(((i, v.index), TX::from(k).unwrap())); g -= v.alpha * k; @@ -578,7 +569,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 cache.insert(v.0, v.1.to_f64().unwrap()); } - let x_f64 = x.iter().map(|e| e.to_f64().unwrap()).collect(); + let x_f64: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); let k_v = self .parameters .kernel @@ -701,8 +692,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let km = sv1.k; let gm = sv1.grad; let mut best = 0f64; + let xi: Vec<_> = sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { let v = &self.sv[i]; + let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); let z = v.grad - gm; let k = cache.get( sv1, @@ -711,10 +704,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &sv1.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(), ); let mut curv = km + v.k - 2f64 * k; @@ -732,6 +722,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } + let xi: Vec<_> = self.sv[idx_1] + .x + .iter() + .map(|e| e.to_f64().unwrap()) + .collect::>(); + idx_2.map(|idx_2| { ( idx_1, @@ -742,16 +738,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .as_ref() .unwrap() .apply( - &self.sv[idx_1] - .x - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), + &xi, &self.sv[idx_2] .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), ) .unwrap() }), @@ -765,8 +757,11 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 let km = sv2.k; let gm = sv2.grad; let mut best = 0f64; + + let xi: Vec<_> = sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { let v = &self.sv[i]; + let xj: Vec<_> = v.x.iter().map(|e| e.to_f64().unwrap()).collect(); let z = gm - v.grad; let k = cache.get( sv2, @@ -775,10 +770,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &sv2.x.iter().map(|e| e.to_f64().unwrap()).collect(), - &v.x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi, &xj) .unwrap(), ); let mut curv = km + v.k - 2f64 * k; @@ -797,6 +789,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 } } + let xj: Vec<_> = self.sv[idx_2] + .x + .iter() + .map(|e| e.to_f64().unwrap()) + .collect(); + idx_1.map(|idx_1| { ( idx_1, @@ -811,12 +809,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), - &self.sv[idx_2] - .x - .iter() - .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), + &xj, ) .unwrap() }), @@ -835,12 +829,12 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), &self.sv[idx_2] .x .iter() .map(|e| e.to_f64().unwrap()) - .collect(), + .collect::>(), ) .unwrap(), )), @@ -895,7 +889,10 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 self.sv[v1].alpha -= step.to_f64().unwrap(); self.sv[v2].alpha += step.to_f64().unwrap(); + let xi_v1: Vec<_> = self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(); + let xi_v2: Vec<_> = self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.sv.len() { + let xj: Vec<_> = self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(); let k2 = cache.get( &self.sv[v2], &self.sv[i], @@ -903,10 +900,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &self.sv[v2].x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi_v2, &xj) .unwrap(), ); let k1 = cache.get( @@ -916,10 +910,7 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1 .kernel .as_ref() .unwrap() - .apply( - &self.sv[v1].x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.sv[i].x.iter().map(|e| e.to_f64().unwrap()).collect(), - ) + .apply(&xi_v1, &xj) .unwrap(), ); self.sv[i].grad -= step.to_f64().unwrap() * (k2 - k1); diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 6fbd15be..7511aeaf 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -248,19 +248,20 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' let mut y_hat: Vec = Vec::::zeros(n); + let mut x_i = Vec::with_capacity(n); for i in 0..n { - y_hat.set( - i, - self.predict_for_row(Vec::from_iterator(x.get_row(i).iterator(0).copied(), n)), - ); + x_i.clear(); + x_i.extend(x.get_row(i).iterator(0).copied()); + y_hat.set(i, self.predict_for_row(&x_i)); } Ok(y_hat) } - pub(crate) fn predict_for_row(&self, x: Vec) -> T { + pub(crate) fn predict_for_row(&self, x: &[T]) -> T { let mut f = self.b; + let xi: Vec<_> = x.iter().map(|e| e.to_f64().unwrap()).collect(); for i in 0..self.instances.as_ref().unwrap().len() { f += self.w.as_ref().unwrap()[i] * T::from( @@ -270,10 +271,7 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' .kernel .as_ref() .unwrap() - .apply( - &x.iter().map(|e| e.to_f64().unwrap()).collect(), - &self.instances.as_ref().unwrap()[i], - ) + .apply(&xi, &self.instances.as_ref().unwrap()[i]) .unwrap(), ) .unwrap() From 2d7c05515444ccca7789c37d893d72d1c4a77fc4 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 1 May 2023 13:20:17 +0100 Subject: [PATCH 05/15] Bump version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a30db160..48d91804 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "smartcore" description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.3.1" +version = "0.3.2" authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" From dbdc2b2a77d7b73cc483f238b44382946a785f6a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:56:42 +0100 Subject: [PATCH 06/15] Update itertools requirement from 0.10.5 to 0.11.0 (#266) Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version. - [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-itertools/itertools/compare/v0.10.5...v0.11.0) --- updated-dependencies: - dependency-name: itertools dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 48d91804..57445059 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ getrandom = { version = "0.2.8", optional = true } wasm-bindgen-test = "0.3" [dev-dependencies] -itertools = "0.10.5" +itertools = "0.11.0" serde_json = "1.0" bincode = "1.3.1" From 6f22bbd1500ae56b3e26b676971c8b7935f7c30f Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Mon, 20 Nov 2023 21:54:09 -0400 Subject: [PATCH 07/15] chore: update clippy lints (#272) * chore: fix clippy lints --------- Co-authored-by: Luis Moreno --- src/cluster/dbscan.rs | 3 +-- src/dataset/diabetes.rs | 2 +- src/dataset/digits.rs | 8 +++----- src/dataset/iris.rs | 4 ++-- src/linalg/basic/arrays.rs | 6 ++---- src/linalg/basic/matrix.rs | 2 +- src/linalg/basic/vector.rs | 2 +- src/linear/bg_solver.rs | 2 +- src/linear/logistic_regression.rs | 6 +----- src/neighbors/knn_regressor.rs | 4 ++-- src/preprocessing/categorical.rs | 2 +- src/svm/mod.rs | 2 +- src/tree/decision_tree_regressor.rs | 4 ++-- 13 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 0d84a613..584cdc31 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -315,8 +315,7 @@ impl, Y: Array1, D: Distance>> } } - while !neighbors.is_empty() { - let neighbor = neighbors.pop().unwrap(); + while let Some(neighbor) = neighbors.pop() { let index = neighbor.0; if y[index] == outlier { diff --git a/src/dataset/diabetes.rs b/src/dataset/diabetes.rs index faf169eb..a95b5116 100644 --- a/src/dataset/diabetes.rs +++ b/src/dataset/diabetes.rs @@ -40,7 +40,7 @@ pub fn load_dataset() -> Dataset { target: y, num_samples, num_features, - feature_names: vec![ + feature_names: [ "Age", "Sex", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6", ] .iter() diff --git a/src/dataset/digits.rs b/src/dataset/digits.rs index b3556e53..c32648cd 100644 --- a/src/dataset/digits.rs +++ b/src/dataset/digits.rs @@ -25,16 +25,14 @@ pub fn load_dataset() -> Dataset { target: y, num_samples, num_features, - feature_names: vec![ - "sepal length (cm)", + feature_names: ["sepal length (cm)", "sepal width (cm)", "petal length (cm)", - "petal width (cm)", - ] + "petal width (cm)"] .iter() .map(|s| s.to_string()) .collect(), - target_names: vec!["setosa", "versicolor", "virginica"] + target_names: ["setosa", "versicolor", "virginica"] .iter() .map(|s| s.to_string()) .collect(), diff --git a/src/dataset/iris.rs b/src/dataset/iris.rs index fe60241a..75c58acc 100644 --- a/src/dataset/iris.rs +++ b/src/dataset/iris.rs @@ -36,7 +36,7 @@ pub fn load_dataset() -> Dataset { target: y, num_samples, num_features, - feature_names: vec![ + feature_names: [ "sepal length (cm)", "sepal width (cm)", "petal length (cm)", @@ -45,7 +45,7 @@ pub fn load_dataset() -> Dataset { .iter() .map(|s| s.to_string()) .collect(), - target_names: vec!["setosa", "versicolor", "virginica"] + target_names: ["setosa", "versicolor", "virginica"] .iter() .map(|s| s.to_string()) .collect(), diff --git a/src/linalg/basic/arrays.rs b/src/linalg/basic/arrays.rs index 7d0c77a6..0df1bf75 100644 --- a/src/linalg/basic/arrays.rs +++ b/src/linalg/basic/arrays.rs @@ -188,8 +188,7 @@ pub trait ArrayView1: Array { _ => max, } }; - self.iterator(0) - .fold(T::min_value(), |max, x| max_f(max, x)) + self.iterator(0).fold(T::min_value(), max_f) } /// return min value from the view fn min(&self) -> T @@ -202,8 +201,7 @@ pub trait ArrayView1: Array { _ => min, } }; - self.iterator(0) - .fold(T::max_value(), |max, x| min_f(max, x)) + self.iterator(0).fold(T::max_value(), min_f) } /// return the position of the max value of the view fn argmax(&self) -> usize diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index e108cea4..6b39b55c 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -650,7 +650,7 @@ mod tests { #[test] fn test_from_iterator() { - let data = vec![1, 2, 3, 4, 5, 6]; + let data = [1, 2, 3, 4, 5, 6]; let m = DenseMatrix::from_iterator(data.iter(), 2, 3, 0); diff --git a/src/linalg/basic/vector.rs b/src/linalg/basic/vector.rs index 5d79ab22..08ea620b 100644 --- a/src/linalg/basic/vector.rs +++ b/src/linalg/basic/vector.rs @@ -211,7 +211,7 @@ mod tests { #[test] fn test_len() { - let x = vec![1, 2, 3]; + let x = [1, 2, 3]; assert_eq!(3, x.len()); } diff --git a/src/linear/bg_solver.rs b/src/linear/bg_solver.rs index d1ad29f2..5665c763 100644 --- a/src/linear/bg_solver.rs +++ b/src/linear/bg_solver.rs @@ -160,7 +160,7 @@ mod tests { fn bg_solver() { let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); let b = vec![40., 51., 28.]; - let expected = vec![1.0, 2.0, 3.0]; + let expected = [1.0, 2.0, 3.0]; let mut x = Vec::zeros(3); diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 4a4041bc..0defd0fa 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -890,11 +890,7 @@ mod tests { let y_hat = lr.predict(&x).unwrap(); - let error: i32 = y - .into_iter() - .zip(y_hat.into_iter()) - .map(|(a, b)| (a - b).abs()) - .sum(); + let error: i32 = y.into_iter().zip(y_hat).map(|(a, b)| (a - b).abs()).sum(); assert!(error <= 1); diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index 914f810e..5798700d 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -297,7 +297,7 @@ mod tests { let x = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); let y: Vec = vec![1., 2., 3., 4., 5.]; - let y_exp = vec![1., 2., 3., 4., 5.]; + let y_exp = [1., 2., 3., 4., 5.]; let knn = KNNRegressor::fit( &x, &y, @@ -324,7 +324,7 @@ mod tests { let x = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); let y: Vec = vec![1., 2., 3., 4., 5.]; - let y_exp = vec![2., 2., 3., 4., 4.]; + let y_exp = [2., 2., 3., 4., 4.]; let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); let y_hat = knn.predict(&x).unwrap(); assert_eq!(5, Vec::len(&y_hat)); diff --git a/src/preprocessing/categorical.rs b/src/preprocessing/categorical.rs index 933d7c2b..dfa7d239 100644 --- a/src/preprocessing/categorical.rs +++ b/src/preprocessing/categorical.rs @@ -277,7 +277,7 @@ mod tests { )] #[test] fn hash_encode_f64_series() { - let series = vec![3.0, 1.0, 2.0, 1.0]; + let series = [3.0, 1.0, 2.0, 1.0]; let hashable_series: Vec = series.iter().map(|v| v.to_category()).collect(); let enc = CategoryMapper::from_positional_category_vec(hashable_series); diff --git a/src/svm/mod.rs b/src/svm/mod.rs index b2bd79cb..0792fdb8 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -56,7 +56,7 @@ pub struct Kernels; impl Kernels { /// Return a default linear pub fn linear() -> LinearKernel { - LinearKernel::default() + LinearKernel } /// Return a default RBF pub fn rbf() -> RBFKernel { diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index d21c7490..21832ba4 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -767,7 +767,7 @@ mod tests { assert!((y_hat[i] - y[i]).abs() < 0.1); } - let expected_y = vec![ + let expected_y = [ 87.3, 87.3, 87.3, 87.3, 98.9, 98.9, 98.9, 98.9, 98.9, 107.9, 107.9, 107.9, 114.85, 114.85, 114.85, 114.85, ]; @@ -788,7 +788,7 @@ mod tests { assert!((y_hat[i] - expected_y[i]).abs() < 0.1); } - let expected_y = vec![ + let expected_y = [ 83.0, 88.35, 88.35, 89.5, 97.15, 97.15, 99.5, 99.5, 101.2, 104.6, 109.6, 109.6, 113.4, 113.4, 116.30, 116.30, ]; From 9c07925d8a885bfc9f45f9538287a395a688b1d8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 20 Nov 2023 22:00:34 -0400 Subject: [PATCH 08/15] Update itertools requirement from 0.11.0 to 0.12.0 (#271) Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version. - [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-itertools/itertools/compare/v0.11.0...v0.12.0) --- updated-dependencies: - dependency-name: itertools dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 57445059..c13003b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ getrandom = { version = "0.2.8", optional = true } wasm-bindgen-test = "0.3" [dev-dependencies] -itertools = "0.11.0" +itertools = "0.12.0" serde_json = "1.0" bincode = "1.3.1" From 886b5631b7c4a8e2aac2dfa903d77e53195a564a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20Meyer?= Date: Wed, 10 Jan 2024 19:59:10 +0100 Subject: [PATCH 09/15] In Naive Bayes, avoid using `Option::unwrap` and so avoid panicking from NaN values (#274) --- .../hyper_tuning/grid_search.rs | 4 +- src/naive_bayes/mod.rs | 94 +++++++++++++++++-- 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/src/model_selection/hyper_tuning/grid_search.rs b/src/model_selection/hyper_tuning/grid_search.rs index 3c914e48..74242c60 100644 --- a/src/model_selection/hyper_tuning/grid_search.rs +++ b/src/model_selection/hyper_tuning/grid_search.rs @@ -3,9 +3,9 @@ use crate::{ api::{Predictor, SupervisedEstimator}, error::{Failed, FailedError}, - linalg::basic::arrays::{Array2, Array1}, - numbers::realnum::RealNumber, + linalg::basic::arrays::{Array1, Array2}, numbers::basenum::Number, + numbers::realnum::RealNumber, }; use crate::model_selection::{cross_validate, BaseKFold, CrossValidationResult}; diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index e7ab7f6d..11614d14 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -40,7 +40,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1}; use crate::numbers::basenum::Number; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use std::marker::PhantomData; +use std::{cmp::Ordering, marker::PhantomData}; /// Distribution used in the Naive Bayes classifier. pub(crate) trait NBDistribution: Clone { @@ -92,11 +92,10 @@ impl, Y: Array1, D: NBDistribution Result { let y_classes = self.distribution.classes(); - let (rows, _) = x.shape(); - let predictions = (0..rows) - .map(|row_index| { - let row = x.get_row(row_index); - let (prediction, _probability) = y_classes + let predictions = x + .row_iter() + .map(|row| { + y_classes .iter() .enumerate() .map(|(class_index, class)| { @@ -106,11 +105,26 @@ impl, Y: Array1, D: NBDistribution ordering, + None => { + if p1.is_nan() { + Ordering::Less + } else if p2.is_nan() { + Ordering::Greater + } else { + Ordering::Equal + } + } + }) + .map(|(prediction, _probability)| *prediction) + .ok_or_else(|| Failed::predict("Failed to predict, there is no result")) }) - .collect::>(); + .collect::, Failed>>()?; let y_hat = Y::from_vec_slice(&predictions); Ok(y_hat) } @@ -119,3 +133,63 @@ pub mod bernoulli; pub mod categorical; pub mod gaussian; pub mod multinomial; + +#[cfg(test)] +mod tests { + use super::*; + use crate::linalg::basic::arrays::Array; + use crate::linalg::basic::matrix::DenseMatrix; + use num_traits::float::Float; + + type Model<'d> = BaseNaiveBayes, Vec, TestDistribution<'d>>; + + #[derive(Debug, PartialEq, Clone)] + struct TestDistribution<'d>(&'d Vec); + + impl<'d> NBDistribution for TestDistribution<'d> { + fn prior(&self, _class_index: usize) -> f64 { + 1. + } + + fn log_likelihood<'a>( + &'a self, + class_index: usize, + _j: &'a Box + 'a>, + ) -> f64 { + match self.0.get(class_index) { + &v @ 2 | &v @ 10 | &v @ 20 => v as f64, + _ => f64::nan(), + } + } + + fn classes(&self) -> &Vec { + &self.0 + } + } + + #[test] + fn test_predict() { + let matrix = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); + + let val = vec![]; + match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { + Ok(_) => panic!("Should return error in case of empty classes"), + Err(err) => assert_eq!( + err.to_string(), + "Predict failed: Failed to predict, there is no result" + ), + } + + let val = vec![1, 2, 3]; + match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { + Ok(r) => assert_eq!(r, vec![2, 2, 2]), + Err(_) => panic!("Should success in normal case with NaNs"), + } + + let val = vec![20, 2, 10]; + match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { + Ok(r) => assert_eq!(r, vec![20, 20, 20]), + Err(_) => panic!("Should success in normal case without NaNs"), + } + } +} From 4eadd16ce4af6d6e2364d6dac2fcb462ee18b47b Mon Sep 17 00:00:00 2001 From: Tushushu Date: Sun, 25 Feb 2024 12:37:30 +0800 Subject: [PATCH 10/15] Implement the feature importance for Decision Tree Classifier (#275) * store impurity in the node * add number of features * add a TODO * draft feature importance * feat * n_samples of node * compute_feature_importances * unit tests * always calculate impurity * fix bug * fix linter --- src/tree/decision_tree_classifier.rs | 108 +++++++++++++++++++++------ 1 file changed, 85 insertions(+), 23 deletions(-) diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 4f36e5b9..3c9deaf7 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -116,6 +116,7 @@ pub struct DecisionTreeClassifier< num_classes: usize, classes: Vec, depth: u16, + num_features: usize, _phantom_tx: PhantomData, _phantom_x: PhantomData, _phantom_y: PhantomData, @@ -159,11 +160,13 @@ pub enum SplitCriterion { #[derive(Debug, Clone)] struct Node { output: usize, + n_node_samples: usize, split_feature: usize, split_value: Option, split_score: Option, true_child: Option, false_child: Option, + impurity: Option, } impl, Y: Array1> PartialEq @@ -400,14 +403,16 @@ impl Default for DecisionTreeClassifierSearchParameters { } impl Node { - fn new(output: usize) -> Self { + fn new(output: usize, n_node_samples: usize) -> Self { Node { output, + n_node_samples, split_feature: 0, split_value: Option::None, split_score: Option::None, true_child: Option::None, false_child: Option::None, + impurity: Option::None, } } } @@ -507,6 +512,7 @@ impl, Y: Array1> num_classes: 0usize, classes: vec![], depth: 0u16, + num_features: 0usize, _phantom_tx: PhantomData, _phantom_x: PhantomData, _phantom_y: PhantomData, @@ -578,7 +584,7 @@ impl, Y: Array1> count[yi[i]] += samples[i]; } - let root = Node::new(which_max(&count)); + let root = Node::new(which_max(&count), y_ncols); change_nodes.push(root); let mut order: Vec> = Vec::new(); @@ -593,6 +599,7 @@ impl, Y: Array1> num_classes: k, classes, depth: 0u16, + num_features: num_attributes, _phantom_tx: PhantomData, _phantom_x: PhantomData, _phantom_y: PhantomData, @@ -678,16 +685,7 @@ impl, Y: Array1> } } - if is_pure { - return false; - } - let n = visitor.samples.iter().sum(); - - if n <= self.parameters().min_samples_split { - return false; - } - let mut count = vec![0; self.num_classes]; let mut false_count = vec![0; self.num_classes]; for i in 0..n_rows { @@ -696,7 +694,15 @@ impl, Y: Array1> } } - let parent_impurity = impurity(&self.parameters().criterion, &count, n); + self.nodes[visitor.node].impurity = Some(impurity(&self.parameters().criterion, &count, n)); + + if is_pure { + return false; + } + + if n <= self.parameters().min_samples_split { + return false; + } let mut variables = (0..n_attr).collect::>(); @@ -705,14 +711,7 @@ impl, Y: Array1> } for variable in variables.iter().take(mtry) { - self.find_best_split( - visitor, - n, - &count, - &mut false_count, - parent_impurity, - *variable, - ); + self.find_best_split(visitor, n, &count, &mut false_count, *variable); } self.nodes()[visitor.node].split_score.is_some() @@ -724,7 +723,6 @@ impl, Y: Array1> n: usize, count: &[usize], false_count: &mut [usize], - parent_impurity: f64, j: usize, ) { let mut true_count = vec![0; self.num_classes]; @@ -760,6 +758,7 @@ impl, Y: Array1> let true_label = which_max(&true_count); let false_label = which_max(false_count); + let parent_impurity = self.nodes()[visitor.node].impurity.unwrap(); let gain = parent_impurity - tc as f64 / n as f64 * impurity(&self.parameters().criterion, &true_count, tc) @@ -827,9 +826,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes.push(Node::new(visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output, tc)); let false_child_idx = self.nodes().len(); - self.nodes.push(Node::new(visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output, fc)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); @@ -863,6 +862,33 @@ impl, Y: Array1> true } + + /// Compute feature importances for the fitted tree. + pub fn compute_feature_importances(&self, normalize: bool) -> Vec { + let mut importances = vec![0f64; self.num_features]; + + for node in self.nodes().iter() { + if node.true_child.is_none() && node.false_child.is_none() { + continue; + } + let left = &self.nodes()[node.true_child.unwrap()]; + let right = &self.nodes()[node.false_child.unwrap()]; + + importances[node.split_feature] += node.n_node_samples as f64 * node.impurity.unwrap() + - left.n_node_samples as f64 * left.impurity.unwrap() + - right.n_node_samples as f64 * right.impurity.unwrap(); + } + for item in importances.iter_mut() { + *item /= self.nodes()[0].n_node_samples as f64; + } + if normalize { + let sum = importances.iter().sum::(); + for importance in importances.iter_mut() { + *importance /= sum; + } + } + importances + } } #[cfg(test)] @@ -1016,6 +1042,42 @@ mod tests { ); } + #[test] + fn test_compute_feature_importances() { + let x: DenseMatrix = DenseMatrix::from_2d_array(&[ + &[1., 1., 1., 0.], + &[1., 1., 1., 0.], + &[1., 1., 1., 1.], + &[1., 1., 0., 0.], + &[1., 1., 0., 1.], + &[1., 0., 1., 0.], + &[1., 0., 1., 0.], + &[1., 0., 1., 1.], + &[1., 0., 0., 0.], + &[1., 0., 0., 1.], + &[0., 1., 1., 0.], + &[0., 1., 1., 0.], + &[0., 1., 1., 1.], + &[0., 1., 0., 0.], + &[0., 1., 0., 1.], + &[0., 0., 1., 0.], + &[0., 0., 1., 0.], + &[0., 0., 1., 1.], + &[0., 0., 0., 0.], + &[0., 0., 0., 1.], + ]); + let y: Vec = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; + let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); + assert_eq!( + tree.compute_feature_importances(false), + vec![0., 0., 0.21333333333333332, 0.26666666666666666] + ); + assert_eq!( + tree.compute_feature_importances(true), + vec![0., 0., 0.4444444444444444, 0.5555555555555556] + ); + } + #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test From 80a93c1a0e9ce2afa25b1651ef6918855d082d4f Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Sun, 25 Feb 2024 00:17:30 -0500 Subject: [PATCH 11/15] chore: fix clippy (#276) Co-authored-by: Luis Moreno --- src/linalg/basic/matrix.rs | 4 ++-- src/preprocessing/numerical.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index 6b39b55c..47307605 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -193,11 +193,11 @@ impl DenseMatrix { /// New instance of `DenseMatrix` from 2d array. pub fn from_2d_array(values: &[&[T]]) -> Self { - DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect()) + DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect::>()) } /// New instance of `DenseMatrix` from 2d vector. - pub fn from_2d_vec(values: &Vec>) -> Self { + pub fn from_2d_vec(values: &[Vec]) -> Self { let nrows = values.len(); let ncols = values .first() diff --git a/src/preprocessing/numerical.rs b/src/preprocessing/numerical.rs index c673731b..bd5f00d1 100644 --- a/src/preprocessing/numerical.rs +++ b/src/preprocessing/numerical.rs @@ -197,11 +197,11 @@ mod tests { fn combine_three_columns() { assert_eq!( build_matrix_from_columns(vec![ - DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]), - DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]), - DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]) + DenseMatrix::from_2d_vec(&[vec![1.0], vec![1.0], vec![1.0]]), + DenseMatrix::from_2d_vec(&[vec![2.0], vec![2.0], vec![2.0]]), + DenseMatrix::from_2d_vec(&[vec![3.0], vec![3.0], vec![3.0]]) ]), - Some(DenseMatrix::from_2d_vec(&vec![ + Some(DenseMatrix::from_2d_vec(&[ vec![1.0, 2.0, 3.0], vec![1.0, 2.0, 3.0], vec![1.0, 2.0, 3.0] @@ -365,7 +365,7 @@ mod tests { ); assert!( - &DenseMatrix::::from_2d_vec(&vec![fitted_scaler.stds]).approximate_eq( + &DenseMatrix::::from_2d_vec(&[fitted_scaler.stds]).approximate_eq( &DenseMatrix::from_2d_array(&[&[ 0.29426447500954, 0.16758497615485, From 239c00428f7448d30b78bf8653923f6bc0e2c29b Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 4 Mar 2024 13:51:27 +0000 Subject: [PATCH 12/15] Patch to version 0.4.0 (#257) * uncomment test * Add random test for logistic regression * linting * Bump version * Add test for logistic regression * linting * initial commit * final * final-clean * Bump to 0.4.0 * Fix linter * cleanup * Update CHANDELOG with breaking changes * Update CHANDELOG date * Add functional methods to DenseMatrix implementation * linting * add type declaration in test * Fix Wasm tests failing * linting * fix tests * linting * Add type annotations on BBDTree constructor * fix clippy * fix clippy * fix tests * bump version * run fmt. fix changelog --------- Co-authored-by: Edmund Cape --- CHANGELOG.md | 6 + Cargo.toml | 2 +- src/algorithm/neighbour/bbd_tree.rs | 7 +- src/algorithm/neighbour/fastpair.rs | 18 +- src/cluster/dbscan.rs | 6 +- src/cluster/kmeans.rs | 12 +- src/decomposition/pca.rs | 20 +- src/decomposition/svd.rs | 10 +- src/ensemble/random_forest_classifier.rs | 11 +- src/ensemble/random_forest_regressor.rs | 11 +- src/error/mod.rs | 19 ++ src/lib.rs | 2 +- src/linalg/basic/arrays.rs | 206 +++++++++------- src/linalg/basic/matrix.rs | 289 ++++++++++++++++------- src/linalg/basic/vector.rs | 1 + src/linalg/traits/cholesky.rs | 18 +- src/linalg/traits/evd.rs | 20 +- src/linalg/traits/high_order.rs | 6 +- src/linalg/traits/lu.rs | 15 +- src/linalg/traits/qr.rs | 19 +- src/linalg/traits/stats.rs | 26 +- src/linalg/traits/svd.rs | 30 ++- src/linear/bg_solver.rs | 6 +- src/linear/elastic_net.rs | 10 +- src/linear/lasso.rs | 3 +- src/linear/linear_regression.rs | 7 +- src/linear/logistic_regression.rs | 124 +++++++--- src/linear/ridge_regression.rs | 7 +- src/metrics/distance/mahalanobis.rs | 5 +- src/metrics/mod.rs | 2 +- src/model_selection/mod.rs | 16 +- src/naive_bayes/bernoulli.rs | 15 +- src/naive_bayes/categorical.rs | 13 +- src/naive_bayes/gaussian.rs | 11 +- src/naive_bayes/mod.rs | 2 +- src/naive_bayes/multinomial.rs | 15 +- src/neighbors/knn_classifier.rs | 16 +- src/neighbors/knn_regressor.rs | 13 +- src/preprocessing/categorical.rs | 17 +- src/preprocessing/numerical.rs | 85 ++++--- src/readers/csv.rs | 5 +- src/svm/svc.rs | 17 +- src/svm/svr.rs | 8 +- src/tree/decision_tree_classifier.rs | 14 +- src/tree/decision_tree_regressor.rs | 8 +- 45 files changed, 763 insertions(+), 410 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1054327..6df73a6f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.4.0] - 2023-04-05 + +## Added +- WARNING: Breaking changes! +- `DenseMatrix` constructor now returns `Result` to avoid user instantiating inconsistent rows/cols count. Their return values need to be unwrapped with `unwrap()`, see tests + ## [0.3.0] - 2022-11-09 ## Added diff --git a/Cargo.toml b/Cargo.toml index c13003b9..a3fea09b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "smartcore" description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.3.2" +version = "0.4.0" authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" diff --git a/src/algorithm/neighbour/bbd_tree.rs b/src/algorithm/neighbour/bbd_tree.rs index 44cef506..790f6d39 100644 --- a/src/algorithm/neighbour/bbd_tree.rs +++ b/src/algorithm/neighbour/bbd_tree.rs @@ -40,11 +40,11 @@ impl BBDTreeNode { impl BBDTree { pub fn new>(data: &M) -> BBDTree { - let nodes = Vec::new(); + let nodes: Vec = Vec::new(); let (n, _) = data.shape(); - let index = (0..n).collect::>(); + let index = (0..n).collect::>(); let mut tree = BBDTree { nodes, @@ -343,7 +343,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let tree = BBDTree::new(&data); diff --git a/src/algorithm/neighbour/fastpair.rs b/src/algorithm/neighbour/fastpair.rs index eca73ed6..759d018c 100644 --- a/src/algorithm/neighbour/fastpair.rs +++ b/src/algorithm/neighbour/fastpair.rs @@ -17,7 +17,7 @@ /// &[4.6, 3.1, 1.5, 0.2], /// &[5.0, 3.6, 1.4, 0.2], /// &[5.4, 3.9, 1.7, 0.4], -/// ]); +/// ]).unwrap(); /// let fastpair = FastPair::new(&x); /// let closest_pair: PairwiseDistance = fastpair.unwrap().closest_pair(); /// ``` @@ -271,7 +271,7 @@ mod tests_fastpair { fn dataset_has_at_least_three_points() { // Create a dataset which consists of only two points: // A(0.0, 0.0) and B(1.0, 1.0). - let dataset = DenseMatrix::::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]); + let dataset = DenseMatrix::::from_2d_array(&[&[0.0, 0.0], &[1.0, 1.0]]).unwrap(); // We expect an error when we run `FastPair` on this dataset, // becuase `FastPair` currently only works on a minimum of 3 @@ -288,7 +288,7 @@ mod tests_fastpair { #[test] fn one_dimensional_dataset_minimal() { - let dataset = DenseMatrix::::from_2d_array(&[&[0.0], &[2.0], &[9.0]]); + let dataset = DenseMatrix::::from_2d_array(&[&[0.0], &[2.0], &[9.0]]).unwrap(); let result = FastPair::new(&dataset); assert!(result.is_ok()); @@ -308,7 +308,8 @@ mod tests_fastpair { #[test] fn one_dimensional_dataset_2() { - let dataset = DenseMatrix::::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]); + let dataset = + DenseMatrix::::from_2d_array(&[&[27.0], &[0.0], &[9.0], &[2.0]]).unwrap(); let result = FastPair::new(&dataset); assert!(result.is_ok()); @@ -343,7 +344,8 @@ mod tests_fastpair { &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], - ]); + ]) + .unwrap(); let fastpair = FastPair::new(&x); assert!(fastpair.is_ok()); @@ -516,7 +518,8 @@ mod tests_fastpair { &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], - ]); + ]) + .unwrap(); // compute let fastpair = FastPair::new(&x); assert!(fastpair.is_ok()); @@ -564,7 +567,8 @@ mod tests_fastpair { &[6.9, 3.1, 4.9, 1.5], &[5.5, 2.3, 4.0, 1.3], &[6.5, 2.8, 4.6, 1.5], - ]); + ]) + .unwrap(); // compute let fastpair = FastPair::new(&x); assert!(fastpair.is_ok()); diff --git a/src/cluster/dbscan.rs b/src/cluster/dbscan.rs index 584cdc31..2e2aac10 100644 --- a/src/cluster/dbscan.rs +++ b/src/cluster/dbscan.rs @@ -442,7 +442,8 @@ mod tests { &[2.2, 1.2], &[1.8, 0.8], &[3.0, 5.0], - ]); + ]) + .unwrap(); let expected_labels = vec![1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0]; @@ -487,7 +488,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let dbscan = DBSCAN::fit(&x, Default::default()).unwrap(); diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index c2470abb..6609ace4 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -41,7 +41,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! //! let kmeans = KMeans::fit(&x, KMeansParameters::default().with_k(2)).unwrap(); // Fit to data, 2 clusters //! let y_hat: Vec = kmeans.predict(&x).unwrap(); // use the same points for prediction @@ -249,7 +249,7 @@ impl, Y: Array1> Predictor impl, Y: Array1> KMeans { /// Fit algorithm to _NxM_ matrix where _N_ is number of samples and _M_ is number of features. - /// * `data` - training instances to cluster + /// * `data` - training instances to cluster /// * `parameters` - cluster parameters pub fn fit(data: &X, parameters: KMeansParameters) -> Result, Failed> { let bbd = BBDTree::new(data); @@ -424,7 +424,7 @@ mod tests { )] #[test] fn invalid_k() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert!(KMeans::, Vec>::fit( &x, @@ -492,7 +492,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let kmeans = KMeans::fit(&x, Default::default()).unwrap(); @@ -531,7 +532,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let kmeans: KMeans, Vec> = KMeans::fit(&x, Default::default()).unwrap(); diff --git a/src/decomposition/pca.rs b/src/decomposition/pca.rs index d4116ed5..11853648 100644 --- a/src/decomposition/pca.rs +++ b/src/decomposition/pca.rs @@ -35,7 +35,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! //! let pca = PCA::fit(&iris, PCAParameters::default().with_n_components(2)).unwrap(); // Reduce number of features to 2 //! @@ -443,6 +443,7 @@ mod tests { &[2.6, 53.0, 66.0, 10.8], &[6.8, 161.0, 60.0, 15.6], ]) + .unwrap() } #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), @@ -457,7 +458,8 @@ mod tests { &[0.9952, 0.0588], &[0.0463, 0.9769], &[0.0752, 0.2007], - ]); + ]) + .unwrap(); let pca = PCA::fit(&us_arrests, Default::default()).unwrap(); @@ -500,7 +502,8 @@ mod tests { -0.974080592182491, 0.0723250196376097, ], - ]); + ]) + .unwrap(); let expected_projection = DenseMatrix::from_2d_array(&[ &[-64.8022, -11.448, 2.4949, -2.4079], @@ -553,7 +556,8 @@ mod tests { &[91.5446, -22.9529, 0.402, -0.7369], &[118.1763, 5.5076, 2.7113, -0.205], &[10.4345, -5.9245, 3.7944, 0.5179], - ]); + ]) + .unwrap(); let expected_eigenvalues: Vec = vec![ 343544.6277001563, @@ -616,7 +620,8 @@ mod tests { -0.0881962972508558, -0.0096011588898465, ], - ]); + ]) + .unwrap(); let expected_projection = DenseMatrix::from_2d_array(&[ &[0.9856, -1.1334, 0.4443, -0.1563], @@ -669,7 +674,8 @@ mod tests { &[-2.1086, -1.4248, -0.1048, -0.1319], &[-2.0797, 0.6113, 0.1389, -0.1841], &[-0.6294, -0.321, 0.2407, 0.1667], - ]); + ]) + .unwrap(); let expected_eigenvalues: Vec = vec![ 2.480241579149493, @@ -732,7 +738,7 @@ mod tests { // &[4.9, 2.4, 3.3, 1.0], // &[6.6, 2.9, 4.6, 1.3], // &[5.2, 2.7, 3.9, 1.4], - // ]); + // ]).unwrap(); // let pca = PCA::fit(&iris, Default::default()).unwrap(); diff --git a/src/decomposition/svd.rs b/src/decomposition/svd.rs index a82dfbd0..259bfbc0 100644 --- a/src/decomposition/svd.rs +++ b/src/decomposition/svd.rs @@ -32,7 +32,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! //! let svd = SVD::fit(&iris, SVDParameters::default(). //! with_n_components(2)).unwrap(); // Reduce number of features to 2 @@ -292,7 +292,8 @@ mod tests { &[5.7, 81.0, 39.0, 9.3], &[2.6, 53.0, 66.0, 10.8], &[6.8, 161.0, 60.0, 15.6], - ]); + ]) + .unwrap(); let expected = DenseMatrix::from_2d_array(&[ &[243.54655757, -18.76673788], @@ -300,7 +301,8 @@ mod tests { &[305.93972467, -15.39087376], &[197.28420365, -11.66808306], &[293.43187394, 1.91163633], - ]); + ]) + .unwrap(); let svd = SVD::fit(&x, Default::default()).unwrap(); let x_transformed = svd.transform(&x).unwrap(); @@ -341,7 +343,7 @@ mod tests { // &[4.9, 2.4, 3.3, 1.0], // &[6.6, 2.9, 4.6, 1.3], // &[5.2, 2.7, 3.9, 1.4], - // ]); + // ]).unwrap(); // let svd = SVD::fit(&iris, Default::default()).unwrap(); diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index 6448b52e..dabb2480 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -33,7 +33,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -660,7 +660,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let classifier = RandomForestClassifier::fit( @@ -733,7 +734,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let classifier = RandomForestClassifier::fit( @@ -786,7 +788,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let forest = RandomForestClassifier::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/ensemble/random_forest_regressor.rs b/src/ensemble/random_forest_regressor.rs index 926327e1..efc63d3d 100644 --- a/src/ensemble/random_forest_regressor.rs +++ b/src/ensemble/random_forest_regressor.rs @@ -29,7 +29,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! let y = vec![ //! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, //! 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9 @@ -574,7 +574,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -648,7 +649,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -702,7 +704,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, diff --git a/src/error/mod.rs b/src/error/mod.rs index 838df085..b6b1d982 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -32,6 +32,8 @@ pub enum FailedError { SolutionFailed, /// Error in input parameters ParametersError, + /// Invalid state error (should never happen) + InvalidStateError, } impl Failed { @@ -64,6 +66,22 @@ impl Failed { } } + /// new instance of `FailedError::ParametersError` + pub fn input(msg: &str) -> Self { + Failed { + err: FailedError::ParametersError, + msg: msg.to_string(), + } + } + + /// new instance of `FailedError::InvalidStateError` + pub fn invalid_state(msg: &str) -> Self { + Failed { + err: FailedError::InvalidStateError, + msg: msg.to_string(), + } + } + /// new instance of `err` pub fn because(err: FailedError, msg: &str) -> Self { Failed { @@ -97,6 +115,7 @@ impl fmt::Display for FailedError { FailedError::DecompositionFailed => "Decomposition failed", FailedError::SolutionFailed => "Can't find solution", FailedError::ParametersError => "Error in input, check parameters", + FailedError::InvalidStateError => "Invalid state, this should never happen", // useful in development phase of lib }; write!(f, "{failed_err_str}") } diff --git a/src/lib.rs b/src/lib.rs index 136584ee..539acf89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -64,7 +64,7 @@ //! &[3., 4.], //! &[5., 6.], //! &[7., 8.], -//! &[9., 10.]]); +//! &[9., 10.]]).unwrap(); //! // Our classes are defined as a vector //! let y = vec![2, 2, 2, 3, 3]; //! diff --git a/src/linalg/basic/arrays.rs b/src/linalg/basic/arrays.rs index 0df1bf75..99df2078 100644 --- a/src/linalg/basic/arrays.rs +++ b/src/linalg/basic/arrays.rs @@ -1775,7 +1775,7 @@ mod tests { #[test] fn test_xa() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!(vec![7, 8].xa(false, &a), vec![39, 54, 69]); assert_eq!(vec![7, 8, 9].xa(true, &a), vec![50, 122]); } @@ -1783,19 +1783,27 @@ mod tests { #[test] fn test_min_max() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).max(0), + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + .unwrap() + .max(0), vec!(4, 5, 6) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).max(1), + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + .unwrap() + .max(1), vec!(3, 6) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).min(0), + DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]) + .unwrap() + .min(0), vec!(1., 2., 3.) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).min(1), + DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]) + .unwrap() + .min(1), vec!(1., 4.) ); } @@ -1803,11 +1811,15 @@ mod tests { #[test] fn test_argmax() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 5, 3], &[4, 2, 6]]).argmax(0), + DenseMatrix::from_2d_array(&[&[1, 5, 3], &[4, 2, 6]]) + .unwrap() + .argmax(0), vec!(1, 0, 1) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[4, 2, 3], &[1, 5, 6]]).argmax(1), + DenseMatrix::from_2d_array(&[&[4, 2, 3], &[1, 5, 6]]) + .unwrap() + .argmax(1), vec!(0, 2) ); } @@ -1815,168 +1827,181 @@ mod tests { #[test] fn test_sum() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).sum(0), + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + .unwrap() + .sum(0), vec!(5, 7, 9) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).sum(1), + DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]) + .unwrap() + .sum(1), vec!(6., 15.) ); } #[test] fn test_abs() { - let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]); + let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]).unwrap(); x.abs_mut(); - assert_eq!(x, DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]])); + assert_eq!( + x, + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap() + ); } #[test] fn test_neg() { - let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]); + let mut x = DenseMatrix::from_2d_array(&[&[-1, 2, -3], &[4, -5, 6]]).unwrap(); x.neg_mut(); - assert_eq!(x, DenseMatrix::from_2d_array(&[&[1, -2, 3], &[-4, 5, -6]])); + assert_eq!( + x, + DenseMatrix::from_2d_array(&[&[1, -2, 3], &[-4, 5, -6]]).unwrap() + ); } #[test] fn test_copy_from() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); let mut y = DenseMatrix::::zeros(2, 3); y.copy_from(&x); - assert_eq!(y, DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]])); + assert_eq!( + y, + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap() + ); } #[test] fn test_init() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( DenseMatrix::::zeros(2, 2), - DenseMatrix::from_2d_array(&[&[0, 0], &[0, 0]]) + DenseMatrix::from_2d_array(&[&[0, 0], &[0, 0]]).unwrap() ); assert_eq!( DenseMatrix::::ones(2, 2), - DenseMatrix::from_2d_array(&[&[1, 1], &[1, 1]]) + DenseMatrix::from_2d_array(&[&[1, 1], &[1, 1]]).unwrap() ); assert_eq!( DenseMatrix::::eye(3), - DenseMatrix::from_2d_array(&[&[1, 0, 0], &[0, 1, 0], &[0, 0, 1]]) + DenseMatrix::from_2d_array(&[&[1, 0, 0], &[0, 1, 0], &[0, 0, 1]]).unwrap() ); assert_eq!( - DenseMatrix::from_slice(x.slice(0..2, 0..2).as_ref()), - DenseMatrix::from_2d_array(&[&[1, 2], &[4, 5]]) + DenseMatrix::from_slice(x.slice(0..2, 0..2).as_ref()), // internal only? + DenseMatrix::from_2d_array(&[&[1, 2], &[4, 5]]).unwrap() ); assert_eq!( - DenseMatrix::from_row(x.get_row(0).as_ref()), - DenseMatrix::from_2d_array(&[&[1, 2, 3]]) + DenseMatrix::from_row(x.get_row(0).as_ref()), // internal only? + DenseMatrix::from_2d_array(&[&[1, 2, 3]]).unwrap() ); assert_eq!( - DenseMatrix::from_column(x.get_col(0).as_ref()), - DenseMatrix::from_2d_array(&[&[1], &[4]]) + DenseMatrix::from_column(x.get_col(0).as_ref()), // internal only? + DenseMatrix::from_2d_array(&[&[1], &[4]]).unwrap() ); } #[test] fn test_transpose() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( x.transpose(), - DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]) + DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]).unwrap() ); } #[test] fn test_reshape() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( x.reshape(3, 2, 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap() ); assert_eq!( x.reshape(3, 2, 1), - DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]) + DenseMatrix::from_2d_array(&[&[1, 4], &[2, 5], &[3, 6]]).unwrap() ); } #[test] #[should_panic] fn test_failed_reshape() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( x.reshape(4, 2, 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap() ); } #[test] fn test_matmul() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); assert_eq!( a.matmul(&(*b.slice(0..3, 0..2))), - DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]) + DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]).unwrap() ); assert_eq!( a.matmul(&b), - DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]) + DenseMatrix::from_2d_array(&[&[22, 28], &[49, 64]]).unwrap() ); } #[test] fn test_concat() { - let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]); - let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]).unwrap(); assert_eq!( DenseMatrix::concatenate_1d(&[&vec!(1, 2, 3), &vec!(4, 5, 6)], 0), - DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap() ); assert_eq!( DenseMatrix::concatenate_1d(&[&vec!(1, 2), &vec!(3, 4)], 1), - DenseMatrix::from_2d_array(&[&[1, 3], &[2, 4]]) + DenseMatrix::from_2d_array(&[&[1, 3], &[2, 4]]).unwrap() ); assert_eq!( DenseMatrix::concatenate_2d(&[&a, &b], 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]).unwrap() ); assert_eq!( DenseMatrix::concatenate_2d(&[&a, &b], 1), - DenseMatrix::from_2d_array(&[&[1, 2, 5, 6], &[3, 4, 7, 8]]) + DenseMatrix::from_2d_array(&[&[1, 2, 5, 6], &[3, 4, 7, 8]]).unwrap() ); } #[test] fn test_take() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); assert_eq!( a.take(&[0, 2], 1), - DenseMatrix::from_2d_array(&[&[1, 3], &[4, 6]]) + DenseMatrix::from_2d_array(&[&[1, 3], &[4, 6]]).unwrap() ); assert_eq!( b.take(&[0, 2], 0), - DenseMatrix::from_2d_array(&[&[1, 2], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[5, 6]]).unwrap() ); } #[test] fn test_merge() { - let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]), + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6], &[7, 8]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 0, true) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8], &[1, 2], &[3, 4]]), + DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8], &[1, 2], &[3, 4]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 0, false) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2, 5, 7], &[3, 4, 6, 8]]), + DenseMatrix::from_2d_array(&[&[1, 2, 5, 7], &[3, 4, 6, 8]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 1, true) ); assert_eq!( - DenseMatrix::from_2d_array(&[&[5, 7, 1, 2], &[6, 8, 3, 4]]), + DenseMatrix::from_2d_array(&[&[5, 7, 1, 2], &[6, 8, 3, 4]]).unwrap(), a.merge_1d(&[&vec!(5, 6), &vec!(7, 8)], 1, false) ); } @@ -1984,20 +2009,28 @@ mod tests { #[test] fn test_ops() { assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).mul_scalar(2), - DenseMatrix::from_2d_array(&[&[2, 4], &[6, 8]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .mul_scalar(2), + DenseMatrix::from_2d_array(&[&[2, 4], &[6, 8]]).unwrap() ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).add_scalar(2), - DenseMatrix::from_2d_array(&[&[3, 4], &[5, 6]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .add_scalar(2), + DenseMatrix::from_2d_array(&[&[3, 4], &[5, 6]]).unwrap() ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).sub_scalar(1), - DenseMatrix::from_2d_array(&[&[0, 1], &[2, 3]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .sub_scalar(1), + DenseMatrix::from_2d_array(&[&[0, 1], &[2, 3]]).unwrap() ); assert_eq!( - DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).div_scalar(2), - DenseMatrix::from_2d_array(&[&[0, 1], &[1, 2]]) + DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]) + .unwrap() + .div_scalar(2), + DenseMatrix::from_2d_array(&[&[0, 1], &[1, 2]]).unwrap() ); } @@ -2011,42 +2044,45 @@ mod tests { #[test] fn test_vstack() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); let expected = DenseMatrix::from_2d_array(&[ &[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[1, 2, 3], &[4, 5, 6], - ]); + ]) + .unwrap(); let result = a.v_stack(&b); assert_eq!(result, expected); } #[test] fn test_hstack() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); let expected = - DenseMatrix::from_2d_array(&[&[1, 2, 3, 1, 2], &[4, 5, 6, 3, 4], &[7, 8, 9, 5, 6]]); + DenseMatrix::from_2d_array(&[&[1, 2, 3, 1, 2], &[4, 5, 6, 3, 4], &[7, 8, 9, 5, 6]]) + .unwrap(); let result = a.h_stack(&b); assert_eq!(result, expected); } #[test] fn test_map() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]).unwrap(); let result: DenseMatrix = a.map(|&v| v as f64); assert_eq!(result, expected); } #[test] fn scale() { - let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); - let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]); - let expected_1 = DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]); + let mut m = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).unwrap(); + let expected_0 = DenseMatrix::from_2d_array(&[&[-1., -1., -1.], &[1., 1., 1.]]).unwrap(); + let expected_1 = + DenseMatrix::from_2d_array(&[&[-1.22, 0.0, 1.22], &[-1.22, 0.0, 1.22]]).unwrap(); { let mut m = m.clone(); @@ -2060,52 +2096,52 @@ mod tests { #[test] fn test_pow_mut() { - let mut a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]); + let mut a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0], &[4.0, 5.0, 6.0]]).unwrap(); a.pow_mut(2.0); assert_eq!( a, - DenseMatrix::from_2d_array(&[&[1.0, 4.0, 9.0], &[16.0, 25.0, 36.0]]) + DenseMatrix::from_2d_array(&[&[1.0, 4.0, 9.0], &[16.0, 25.0, 36.0]]).unwrap() ); } #[test] fn test_ab() { - let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]); - let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[5, 6], &[7, 8]]).unwrap(); assert_eq!( a.ab(false, &b, false), - DenseMatrix::from_2d_array(&[&[19, 22], &[43, 50]]) + DenseMatrix::from_2d_array(&[&[19, 22], &[43, 50]]).unwrap() ); assert_eq!( a.ab(true, &b, false), - DenseMatrix::from_2d_array(&[&[26, 30], &[38, 44]]) + DenseMatrix::from_2d_array(&[&[26, 30], &[38, 44]]).unwrap() ); assert_eq!( a.ab(false, &b, true), - DenseMatrix::from_2d_array(&[&[17, 23], &[39, 53]]) + DenseMatrix::from_2d_array(&[&[17, 23], &[39, 53]]).unwrap() ); assert_eq!( a.ab(true, &b, true), - DenseMatrix::from_2d_array(&[&[23, 31], &[34, 46]]) + DenseMatrix::from_2d_array(&[&[23, 31], &[34, 46]]).unwrap() ); } #[test] fn test_ax() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); assert_eq!( a.ax(false, &vec![7, 8, 9]).transpose(), - DenseMatrix::from_2d_array(&[&[50, 122]]) + DenseMatrix::from_2d_array(&[&[50, 122]]).unwrap() ); assert_eq!( a.ax(true, &vec![7, 8]).transpose(), - DenseMatrix::from_2d_array(&[&[39, 54, 69]]) + DenseMatrix::from_2d_array(&[&[39, 54, 69]]).unwrap() ); } #[test] fn diag() { - let x = DenseMatrix::from_2d_array(&[&[0, 1, 2], &[3, 4, 5], &[6, 7, 8]]); + let x = DenseMatrix::from_2d_array(&[&[0, 1, 2], &[3, 4, 5], &[6, 7, 8]]).unwrap(); assert_eq!(x.diag(), vec![0, 4, 8]); } @@ -2117,13 +2153,15 @@ mod tests { &[68, 590, 37], &[69, 660, 46], &[73, 600, 55], - ]); + ]) + .unwrap(); let mut result = DenseMatrix::zeros(3, 3); let expected = DenseMatrix::from_2d_array(&[ &[11.5, 50.0, 34.75], &[50.0, 1250.0, 205.0], &[34.75, 205.0, 110.0], - ]); + ]) + .unwrap(); a.cov(&mut result); diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index 47307605..47c5e9d2 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -19,6 +19,8 @@ use crate::linalg::traits::svd::SVDDecomposable; use crate::numbers::basenum::Number; use crate::numbers::realnum::RealNumber; +use crate::error::Failed; + /// Dense matrix #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] @@ -50,26 +52,26 @@ pub struct DenseMatrixMutView<'a, T: Debug + Display + Copy + Sized> { } impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> { - fn new(m: &'a DenseMatrix, rows: Range, cols: Range) -> Self { - let (start, end, stride) = if m.column_major { - ( - rows.start + cols.start * m.nrows, - rows.end + (cols.end - 1) * m.nrows, - m.nrows, - ) + fn new( + m: &'a DenseMatrix, + vrows: Range, + vcols: Range, + ) -> Result { + if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) { + Err(Failed::input( + "The specified view is outside of the matrix range", + )) } else { - ( - rows.start * m.ncols + cols.start, - (rows.end - 1) * m.ncols + cols.end, - m.ncols, - ) - }; - DenseMatrixView { - values: &m.values[start..end], - stride, - nrows: rows.end - rows.start, - ncols: cols.end - cols.start, - column_major: m.column_major, + let (start, end, stride) = + m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major); + + Ok(DenseMatrixView { + values: &m.values[start..end], + stride, + nrows: vrows.end - vrows.start, + ncols: vcols.end - vcols.start, + column_major: m.column_major, + }) } } @@ -102,26 +104,26 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, } impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { - fn new(m: &'a mut DenseMatrix, rows: Range, cols: Range) -> Self { - let (start, end, stride) = if m.column_major { - ( - rows.start + cols.start * m.nrows, - rows.end + (cols.end - 1) * m.nrows, - m.nrows, - ) + fn new( + m: &'a mut DenseMatrix, + vrows: Range, + vcols: Range, + ) -> Result { + if m.is_valid_view(m.shape().0, m.shape().1, &vrows, &vcols) { + Err(Failed::input( + "The specified view is outside of the matrix range", + )) } else { - ( - rows.start * m.ncols + cols.start, - (rows.end - 1) * m.ncols + cols.end, - m.ncols, - ) - }; - DenseMatrixMutView { - values: &mut m.values[start..end], - stride, - nrows: rows.end - rows.start, - ncols: cols.end - cols.start, - column_major: m.column_major, + let (start, end, stride) = + m.stride_range(m.shape().0, m.shape().1, &vrows, &vcols, m.column_major); + + Ok(DenseMatrixMutView { + values: &mut m.values[start..end], + stride, + nrows: vrows.end - vrows.start, + ncols: vcols.end - vcols.start, + column_major: m.column_major, + }) } } @@ -182,42 +184,102 @@ impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView< impl DenseMatrix { /// Create new instance of `DenseMatrix` without copying data. /// `values` should be in column-major order. - pub fn new(nrows: usize, ncols: usize, values: Vec, column_major: bool) -> Self { - DenseMatrix { - ncols, - nrows, - values, - column_major, + pub fn new( + nrows: usize, + ncols: usize, + values: Vec, + column_major: bool, + ) -> Result { + let data_len = values.len(); + if nrows * ncols != values.len() { + Err(Failed::input(&format!( + "The specified shape: (cols: {ncols}, rows: {nrows}) does not align with data len: {data_len}" + ))) + } else { + Ok(DenseMatrix { + ncols, + nrows, + values, + column_major, + }) } } /// New instance of `DenseMatrix` from 2d array. - pub fn from_2d_array(values: &[&[T]]) -> Self { - DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect::>()) + pub fn from_2d_array(values: &[&[T]]) -> Result { + DenseMatrix::from_2d_vec(&values.iter().map(|row| Vec::from(*row)).collect()) } /// New instance of `DenseMatrix` from 2d vector. - pub fn from_2d_vec(values: &[Vec]) -> Self { - let nrows = values.len(); - let ncols = values - .first() - .unwrap_or_else(|| panic!("Cannot create 2d matrix from an empty vector")) - .len(); - let mut m_values = Vec::with_capacity(nrows * ncols); - - for c in 0..ncols { - for r in values.iter().take(nrows) { - m_values.push(r[c]) + #[allow(clippy::ptr_arg)] + pub fn from_2d_vec(values: &Vec>) -> Result { + if values.is_empty() || values[0].is_empty() { + Err(Failed::input( + "The 2d vec provided is empty; cannot instantiate the matrix", + )) + } else { + let nrows = values.len(); + let ncols = values + .first() + .unwrap_or_else(|| { + panic!("Invalid state: Cannot create 2d matrix from an empty vector") + }) + .len(); + let mut m_values = Vec::with_capacity(nrows * ncols); + + for c in 0..ncols { + for r in values.iter().take(nrows) { + m_values.push(r[c]) + } } - } - DenseMatrix::new(nrows, ncols, m_values, true) + DenseMatrix::new(nrows, ncols, m_values, true) + } } /// Iterate over values of matrix pub fn iter(&self) -> Iter<'_, T> { self.values.iter() } + + /// Check if the size of the requested view is bounded to matrix rows/cols count + fn is_valid_view( + &self, + n_rows: usize, + n_cols: usize, + vrows: &Range, + vcols: &Range, + ) -> bool { + !(vrows.end <= n_rows + && vcols.end <= n_cols + && vrows.start <= n_rows + && vcols.start <= n_cols) + } + + /// Compute the range of the requested view: start, end, size of the slice + fn stride_range( + &self, + n_rows: usize, + n_cols: usize, + vrows: &Range, + vcols: &Range, + column_major: bool, + ) -> (usize, usize, usize) { + let (start, end, stride) = if column_major { + ( + vrows.start + vcols.start * n_rows, + vrows.end + (vcols.end - 1) * n_rows, + n_rows, + ) + } else { + ( + vrows.start * n_cols + vcols.start, + (vrows.end - 1) * n_cols + vcols.end, + n_cols, + ) + }; + (start, end, stride) + } } impl fmt::Display for DenseMatrix { @@ -304,6 +366,7 @@ where impl Array for DenseMatrix { fn get(&self, pos: (usize, usize)) -> &T { let (row, col) = pos; + if row >= self.nrows || col >= self.ncols { panic!( "Invalid index ({},{}) for {}x{} matrix", @@ -383,15 +446,15 @@ impl MutArrayView2 for DenseMatrix {} impl Array2 for DenseMatrix { fn get_row<'a>(&'a self, row: usize) -> Box + 'a> { - Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols)) + Box::new(DenseMatrixView::new(self, row..row + 1, 0..self.ncols).unwrap()) } fn get_col<'a>(&'a self, col: usize) -> Box + 'a> { - Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1)) + Box::new(DenseMatrixView::new(self, 0..self.nrows, col..col + 1).unwrap()) } fn slice<'a>(&'a self, rows: Range, cols: Range) -> Box + 'a> { - Box::new(DenseMatrixView::new(self, rows, cols)) + Box::new(DenseMatrixView::new(self, rows, cols).unwrap()) } fn slice_mut<'a>( @@ -402,15 +465,17 @@ impl Array2 for DenseMatrix { where Self: Sized, { - Box::new(DenseMatrixMutView::new(self, rows, cols)) + Box::new(DenseMatrixMutView::new(self, rows, cols).unwrap()) } + // private function so for now assume infalible fn fill(nrows: usize, ncols: usize, value: T) -> Self { - DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true) + DenseMatrix::new(nrows, ncols, vec![value; nrows * ncols], true).unwrap() } + // private function so for now assume infalible fn from_iterator>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self { - DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0) + DenseMatrix::new(nrows, ncols, iter.collect(), axis != 0).unwrap() } fn transpose(&self) -> Self { @@ -544,15 +609,74 @@ mod tests { use approx::relative_eq; #[test] - fn test_display() { + fn test_instantiate_from_2d() { let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); + assert!(x.is_ok()); + } + #[test] + fn test_instantiate_from_2d_empty() { + let input: &[&[f64]] = &[&[]]; + let x = DenseMatrix::from_2d_array(input); + assert!(x.is_err()); + } + #[test] + fn test_instantiate_from_2d_empty2() { + let input: &[&[f64]] = &[&[], &[]]; + let x = DenseMatrix::from_2d_array(input); + assert!(x.is_err()); + } + #[test] + fn test_instantiate_ok_view1() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..2, 0..2); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_ok_view2() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..3, 0..3); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_ok_view3() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 2..3, 0..3); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_ok_view4() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 3..3, 0..3); + assert!(v.is_ok()); + } + #[test] + fn test_instantiate_err_view1() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 3..4, 0..3); + assert!(v.is_err()); + } + #[test] + fn test_instantiate_err_view2() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..3, 3..4); + assert!(v.is_err()); + } + #[test] + fn test_instantiate_err_view3() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); + let v = DenseMatrixView::new(&x, 0..3, 4..3); + assert!(v.is_err()); + } + #[test] + fn test_display() { + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); println!("{}", &x); } #[test] fn test_get_row_col() { - let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); + let x = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); assert_eq!(15.0, x.get_col(1).sum()); assert_eq!(15.0, x.get_row(1).sum()); @@ -561,7 +685,7 @@ mod tests { #[test] fn test_row_major() { - let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false); + let mut x = DenseMatrix::new(2, 3, vec![1, 2, 3, 4, 5, 6], false).unwrap(); assert_eq!(5, *x.get_col(1).get(1)); assert_eq!(7, x.get_col(1).sum()); @@ -575,7 +699,8 @@ mod tests { #[test] fn test_get_slice() { - let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]); + let x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]) + .unwrap(); assert_eq!( vec![4, 5, 6], @@ -589,7 +714,7 @@ mod tests { #[test] fn test_iter_mut() { - let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); + let mut x = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); assert_eq!(vec![1, 4, 7, 2, 5, 8, 3, 6, 9], x.values); // add +2 to some elements @@ -625,7 +750,8 @@ mod tests { #[test] fn test_str_array() { let mut x = - DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]]); + DenseMatrix::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"], &["7", "8", "9"]]) + .unwrap(); assert_eq!(vec!["1", "4", "7", "2", "5", "8", "3", "6", "9"], x.values); x.iterator_mut(0).for_each(|v| *v = "str"); @@ -637,7 +763,7 @@ mod tests { #[test] fn test_transpose() { - let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]); + let x = DenseMatrix::<&str>::from_2d_array(&[&["1", "2", "3"], &["4", "5", "6"]]).unwrap(); assert_eq!(vec!["1", "4", "2", "5", "3", "6"], x.values); assert!(x.column_major); @@ -664,8 +790,8 @@ mod tests { #[test] fn test_take() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]); - let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1, 2], &[3, 4], &[5, 6]]).unwrap(); println!("{a}"); // take column 0 and 2 @@ -677,7 +803,7 @@ mod tests { #[test] fn test_mut() { - let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]); + let a = DenseMatrix::from_2d_array(&[&[1.3, -2.1, 3.4], &[-4., -5.3, 6.1]]).unwrap(); let a = a.abs(); assert_eq!(vec![1.3, 4.0, 2.1, 5.3, 3.4, 6.1], a.values); @@ -688,7 +814,8 @@ mod tests { #[test] fn test_reshape() { - let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]); + let a = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9], &[10, 11, 12]]) + .unwrap(); let a = a.reshape(2, 6, 0); assert_eq!(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], a.values); @@ -701,13 +828,15 @@ mod tests { #[test] fn test_eq() { - let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]); - let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]); + let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.]]).unwrap(); + let b = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[4., 5., 6.], &[7., 8., 9.]]).unwrap(); let c = DenseMatrix::from_2d_array(&[ &[1. + f32::EPSILON, 2., 3.], &[4., 5., 6. + f32::EPSILON], - ]); - let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]); + ]) + .unwrap(); + let d = DenseMatrix::from_2d_array(&[&[1. + 0.5, 2., 3.], &[4., 5., 6. + f32::EPSILON]]) + .unwrap(); assert!(!relative_eq!(a, b)); assert!(!relative_eq!(a, d)); diff --git a/src/linalg/basic/vector.rs b/src/linalg/basic/vector.rs index 08ea620b..05c03756 100644 --- a/src/linalg/basic/vector.rs +++ b/src/linalg/basic/vector.rs @@ -55,6 +55,7 @@ impl Array for Vec { impl MutArray for Vec { fn set(&mut self, i: usize, x: T) { + // NOTE: this panics in case of out of bounds index self[i] = x } diff --git a/src/linalg/traits/cholesky.rs b/src/linalg/traits/cholesky.rs index 1394270f..baec8f87 100644 --- a/src/linalg/traits/cholesky.rs +++ b/src/linalg/traits/cholesky.rs @@ -15,7 +15,7 @@ //! &[25., 15., -5.], //! &[15., 18., 0.], //! &[-5., 0., 11.] -//! ]); +//! ]).unwrap(); //! //! let cholesky = A.cholesky().unwrap(); //! let lower_triangular: DenseMatrix = cholesky.L(); @@ -175,11 +175,14 @@ mod tests { )] #[test] fn cholesky_decompose() { - let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); + let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]) + .unwrap(); let l = - DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]]); + DenseMatrix::from_2d_array(&[&[5.0, 0.0, 0.0], &[3.0, 3.0, 0.0], &[-1.0, 1.0, 3.0]]) + .unwrap(); let u = - DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]]); + DenseMatrix::from_2d_array(&[&[5.0, 3.0, -1.0], &[0.0, 3.0, 1.0], &[0.0, 0.0, 3.0]]) + .unwrap(); let cholesky = a.cholesky().unwrap(); assert!(relative_eq!(cholesky.L().abs(), l.abs(), epsilon = 1e-4)); @@ -197,9 +200,10 @@ mod tests { )] #[test] fn cholesky_solve_mut() { - let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); - let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]); - let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]); + let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]) + .unwrap(); + let b = DenseMatrix::from_2d_array(&[&[40., 51., 28.]]).unwrap(); + let expected = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0]]).unwrap(); let cholesky = a.cholesky().unwrap(); diff --git a/src/linalg/traits/evd.rs b/src/linalg/traits/evd.rs index ccbdded6..4db766b0 100644 --- a/src/linalg/traits/evd.rs +++ b/src/linalg/traits/evd.rs @@ -19,7 +19,7 @@ //! &[0.9000, 0.4000, 0.7000], //! &[0.4000, 0.5000, 0.3000], //! &[0.7000, 0.3000, 0.8000], -//! ]); +//! ]).unwrap(); //! //! let evd = A.evd(true).unwrap(); //! let eigenvectors: DenseMatrix = evd.V; @@ -820,7 +820,8 @@ mod tests { &[0.9000, 0.4000, 0.7000], &[0.4000, 0.5000, 0.3000], &[0.7000, 0.3000, 0.8000], - ]); + ]) + .unwrap(); let eigen_values: Vec = vec![1.7498382, 0.3165784, 0.1335834]; @@ -828,7 +829,8 @@ mod tests { &[0.6881997, -0.07121225, 0.7220180], &[0.3700456, 0.89044952, -0.2648886], &[0.6240573, -0.44947578, -0.6391588], - ]); + ]) + .unwrap(); let evd = A.evd(true).unwrap(); @@ -852,7 +854,8 @@ mod tests { &[0.9000, 0.4000, 0.7000], &[0.4000, 0.5000, 0.3000], &[0.8000, 0.3000, 0.8000], - ]); + ]) + .unwrap(); let eigen_values: Vec = vec![1.79171122, 0.31908143, 0.08920735]; @@ -860,7 +863,8 @@ mod tests { &[0.7178958, 0.05322098, 0.6812010], &[0.3837711, -0.84702111, -0.1494582], &[0.6952105, 0.43984484, -0.7036135], - ]); + ]) + .unwrap(); let evd = A.evd(false).unwrap(); @@ -885,7 +889,8 @@ mod tests { &[4.0, -1.0, 1.0, 1.0], &[1.0, 1.0, 3.0, -2.0], &[1.0, 1.0, 4.0, -1.0], - ]); + ]) + .unwrap(); let eigen_values_d: Vec = vec![0.0, 2.0, 2.0, 0.0]; let eigen_values_e: Vec = vec![2.2361, 0.9999, -0.9999, -2.2361]; @@ -895,7 +900,8 @@ mod tests { &[-0.6707, 0.1059, 0.901, 0.6289], &[0.9159, -0.1378, 0.3816, 0.0806], &[0.6707, 0.1059, 0.901, -0.6289], - ]); + ]) + .unwrap(); let evd = A.evd(false).unwrap(); diff --git a/src/linalg/traits/high_order.rs b/src/linalg/traits/high_order.rs index f1f86672..d3466e20 100644 --- a/src/linalg/traits/high_order.rs +++ b/src/linalg/traits/high_order.rs @@ -12,9 +12,9 @@ pub trait HighOrderOperations: Array2 { /// use smartcore::linalg::traits::high_order::HighOrderOperations; /// use smartcore::linalg::basic::arrays::Array2; /// - /// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]); - /// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]); - /// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]); + /// let a = DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.]]).unwrap(); + /// let b = DenseMatrix::from_2d_array(&[&[5., 6.], &[7., 8.], &[9., 10.]]).unwrap(); + /// let expected = DenseMatrix::from_2d_array(&[&[71., 80.], &[92., 104.]]).unwrap(); /// /// assert_eq!(a.ab(true, &b, false), expected); /// ``` diff --git a/src/linalg/traits/lu.rs b/src/linalg/traits/lu.rs index 1f0d5f47..7a1d0439 100644 --- a/src/linalg/traits/lu.rs +++ b/src/linalg/traits/lu.rs @@ -18,7 +18,7 @@ //! &[1., 2., 3.], //! &[0., 1., 5.], //! &[5., 6., 0.] -//! ]); +//! ]).unwrap(); //! //! let lu = A.lu().unwrap(); //! let lower: DenseMatrix = lu.L(); @@ -263,13 +263,13 @@ mod tests { )] #[test] fn decompose() { - let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]); + let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap(); let expected_L = - DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]); + DenseMatrix::from_2d_array(&[&[1., 0., 0.], &[0., 1., 0.], &[0.2, 0.8, 1.]]).unwrap(); let expected_U = - DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]); + DenseMatrix::from_2d_array(&[&[5., 6., 0.], &[0., 1., 5.], &[0., 0., -1.]]).unwrap(); let expected_pivot = - DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]); + DenseMatrix::from_2d_array(&[&[0., 0., 1.], &[0., 1., 0.], &[1., 0., 0.]]).unwrap(); let lu = a.lu().unwrap(); assert!(relative_eq!(lu.L(), expected_L, epsilon = 1e-4)); assert!(relative_eq!(lu.U(), expected_U, epsilon = 1e-4)); @@ -281,9 +281,10 @@ mod tests { )] #[test] fn inverse() { - let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]); + let a = DenseMatrix::from_2d_array(&[&[1., 2., 3.], &[0., 1., 5.], &[5., 6., 0.]]).unwrap(); let expected = - DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]]); + DenseMatrix::from_2d_array(&[&[-6.0, 3.6, 1.4], &[5.0, -3.0, -1.0], &[-1.0, 0.8, 0.2]]) + .unwrap(); let a_inv = a.lu().and_then(|lu| lu.inverse()).unwrap(); assert!(relative_eq!(a_inv, expected, epsilon = 1e-4)); } diff --git a/src/linalg/traits/qr.rs b/src/linalg/traits/qr.rs index eb452e13..2c70efcb 100644 --- a/src/linalg/traits/qr.rs +++ b/src/linalg/traits/qr.rs @@ -13,7 +13,7 @@ //! &[0.9, 0.4, 0.7], //! &[0.4, 0.5, 0.3], //! &[0.7, 0.3, 0.8] -//! ]); +//! ]).unwrap(); //! //! let qr = A.qr().unwrap(); //! let orthogonal: DenseMatrix = qr.Q(); @@ -201,17 +201,20 @@ mod tests { )] #[test] fn decompose() { - let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); + let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]) + .unwrap(); let q = DenseMatrix::from_2d_array(&[ &[-0.7448, 0.2436, 0.6212], &[-0.331, -0.9432, -0.027], &[-0.5793, 0.2257, -0.7832], - ]); + ]) + .unwrap(); let r = DenseMatrix::from_2d_array(&[ &[-1.2083, -0.6373, -1.0842], &[0.0, -0.3064, 0.0682], &[0.0, 0.0, -0.1999], - ]); + ]) + .unwrap(); let qr = a.qr().unwrap(); assert!(relative_eq!(qr.Q().abs(), q.abs(), epsilon = 1e-4)); assert!(relative_eq!(qr.R().abs(), r.abs(), epsilon = 1e-4)); @@ -223,13 +226,15 @@ mod tests { )] #[test] fn qr_solve_mut() { - let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); - let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]); + let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]) + .unwrap(); + let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap(); let expected_w = DenseMatrix::from_2d_array(&[ &[-0.2027027, -1.2837838], &[0.8783784, 2.2297297], &[0.4729730, 0.6621622], - ]); + ]) + .unwrap(); let w = a.qr_solve_mut(b).unwrap(); assert!(relative_eq!(w, expected_w, epsilon = 1e-2)); } diff --git a/src/linalg/traits/stats.rs b/src/linalg/traits/stats.rs index 052da476..43c23dce 100644 --- a/src/linalg/traits/stats.rs +++ b/src/linalg/traits/stats.rs @@ -136,8 +136,8 @@ pub trait MatrixPreprocessing: MutArrayView2 + Clone { /// ```rust /// use smartcore::linalg::basic::matrix::DenseMatrix; /// use smartcore::linalg::traits::stats::MatrixPreprocessing; - /// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]); - /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]); + /// let mut a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap(); + /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap(); /// a.binarize_mut(0.); /// /// assert_eq!(a, expected); @@ -159,8 +159,8 @@ pub trait MatrixPreprocessing: MutArrayView2 + Clone { /// ```rust /// use smartcore::linalg::basic::matrix::DenseMatrix; /// use smartcore::linalg::traits::stats::MatrixPreprocessing; - /// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]); - /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]); + /// let a = DenseMatrix::from_2d_array(&[&[0., 2., 3.], &[-5., -6., -7.]]).unwrap(); + /// let expected = DenseMatrix::from_2d_array(&[&[0., 1., 1.],&[0., 0., 0.]]).unwrap(); /// /// assert_eq!(a.binarize(0.), expected); /// ``` @@ -186,7 +186,8 @@ mod tests { &[1., 2., 3., 1., 2.], &[4., 5., 6., 3., 4.], &[7., 8., 9., 5., 6.], - ]); + ]) + .unwrap(); let expected_0 = vec![4., 5., 6., 3., 4.]; let expected_1 = vec![1.8, 4.4, 7.]; @@ -196,7 +197,7 @@ mod tests { #[test] fn test_var() { - let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); + let m = DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap(); let expected_0 = vec![4., 4., 4., 4.]; let expected_1 = vec![1.25, 1.25]; @@ -211,7 +212,8 @@ mod tests { let m = DenseMatrix::from_2d_array(&[ &[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25], &[0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25], - ]); + ]) + .unwrap(); let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let expected_1 = vec![1.25, 1.25]; @@ -230,7 +232,8 @@ mod tests { &[1., 2., 3., 1., 2.], &[4., 5., 6., 3., 4.], &[7., 8., 9., 5., 6.], - ]); + ]) + .unwrap(); let expected_0 = vec![ 2.449489742783178, 2.449489742783178, @@ -251,10 +254,10 @@ mod tests { #[test] fn test_scale() { let m: DenseMatrix = - DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]); + DenseMatrix::from_2d_array(&[&[1., 2., 3., 4.], &[5., 6., 7., 8.]]).unwrap(); let expected_0: DenseMatrix = - DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]); + DenseMatrix::from_2d_array(&[&[-1., -1., -1., -1.], &[1., 1., 1., 1.]]).unwrap(); let expected_1: DenseMatrix = DenseMatrix::from_2d_array(&[ &[ -1.3416407864998738, @@ -268,7 +271,8 @@ mod tests { 0.4472135954999579, 1.3416407864998738, ], - ]); + ]) + .unwrap(); assert_eq!(m.mean(0), vec![3.0, 4.0, 5.0, 6.0]); assert_eq!(m.mean(1), vec![2.5, 6.5]); diff --git a/src/linalg/traits/svd.rs b/src/linalg/traits/svd.rs index 8608942d..75c303ae 100644 --- a/src/linalg/traits/svd.rs +++ b/src/linalg/traits/svd.rs @@ -17,7 +17,7 @@ //! &[0.9, 0.4, 0.7], //! &[0.4, 0.5, 0.3], //! &[0.7, 0.3, 0.8] -//! ]); +//! ]).unwrap(); //! //! let svd = A.svd().unwrap(); //! let u: DenseMatrix = svd.U; @@ -489,7 +489,8 @@ mod tests { &[0.9000, 0.4000, 0.7000], &[0.4000, 0.5000, 0.3000], &[0.7000, 0.3000, 0.8000], - ]); + ]) + .unwrap(); let s: Vec = vec![1.7498382, 0.3165784, 0.1335834]; @@ -497,13 +498,15 @@ mod tests { &[0.6881997, -0.07121225, 0.7220180], &[0.3700456, 0.89044952, -0.2648886], &[0.6240573, -0.44947578, -0.639158], - ]); + ]) + .unwrap(); let V = DenseMatrix::from_2d_array(&[ &[0.6881997, -0.07121225, 0.7220180], &[0.3700456, 0.89044952, -0.2648886], &[0.6240573, -0.44947578, -0.6391588], - ]); + ]) + .unwrap(); let svd = A.svd().unwrap(); @@ -577,7 +580,8 @@ mod tests { -0.2158704, -0.27529472, ], - ]); + ]) + .unwrap(); let s: Vec = vec![ 3.8589375, 3.4396766, 2.6487176, 2.2317399, 1.5165054, 0.8109055, 0.2706515, @@ -647,7 +651,8 @@ mod tests { 0.73034065, -0.43965505, ], - ]); + ]) + .unwrap(); let V = DenseMatrix::from_2d_array(&[ &[ @@ -707,7 +712,8 @@ mod tests { 0.1654796, -0.32346758, ], - ]); + ]) + .unwrap(); let svd = A.svd().unwrap(); @@ -723,10 +729,11 @@ mod tests { )] #[test] fn solve() { - let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]); - let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]); + let a = DenseMatrix::from_2d_array(&[&[0.9, 0.4, 0.7], &[0.4, 0.5, 0.3], &[0.7, 0.3, 0.8]]) + .unwrap(); + let b = DenseMatrix::from_2d_array(&[&[0.5, 0.2], &[0.5, 0.8], &[0.5, 0.3]]).unwrap(); let expected_w = - DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]); + DenseMatrix::from_2d_array(&[&[-0.20, -1.28], &[0.87, 2.22], &[0.47, 0.66]]).unwrap(); let w = a.svd_solve_mut(b).unwrap(); assert!(relative_eq!(w, expected_w, epsilon = 1e-2)); } @@ -737,7 +744,8 @@ mod tests { )] #[test] fn decompose_restore() { - let a = DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]); + let a = + DenseMatrix::from_2d_array(&[&[1.0, 2.0, 3.0, 4.0], &[5.0, 6.0, 7.0, 8.0]]).unwrap(); let svd = a.svd().unwrap(); let u: &DenseMatrix = &svd.U; //U let v: &DenseMatrix = &svd.V; // V diff --git a/src/linear/bg_solver.rs b/src/linear/bg_solver.rs index 5665c763..6ee4f0ec 100644 --- a/src/linear/bg_solver.rs +++ b/src/linear/bg_solver.rs @@ -12,7 +12,8 @@ //! pub struct BGSolver {} //! impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> for BGSolver {} //! -//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); +//! let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., +//! 11.]]).unwrap(); //! let b = vec![40., 51., 28.]; //! let expected = vec![1.0, 2.0, 3.0]; //! let mut x = Vec::zeros(3); @@ -158,7 +159,8 @@ mod tests { #[test] fn bg_solver() { - let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]); + let a = DenseMatrix::from_2d_array(&[&[25., 15., -5.], &[15., 18., 0.], &[-5., 0., 11.]]) + .unwrap(); let b = vec![40., 51., 28.]; let expected = [1.0, 2.0, 3.0]; diff --git a/src/linear/elastic_net.rs b/src/linear/elastic_net.rs index 87deddca..643ab14e 100644 --- a/src/linear/elastic_net.rs +++ b/src/linear/elastic_net.rs @@ -38,7 +38,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -511,7 +511,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -562,7 +563,8 @@ mod tests { &[17.0, 1918.0, 1.4054969025700674], &[18.0, 1929.0, 1.3271699396384906], &[19.0, 1915.0, 1.1373332337674806], - ]); + ]) + .unwrap(); let y: Vec = vec![ 1.48, 2.72, 4.52, 5.72, 5.25, 4.07, 3.75, 4.75, 6.77, 4.72, 6.78, 6.79, 8.3, 7.42, @@ -627,7 +629,7 @@ mod tests { // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - // ]); + // ]).unwrap(); // let y = vec![ // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/linear/lasso.rs b/src/linear/lasso.rs index 8de391fc..2919b025 100644 --- a/src/linear/lasso.rs +++ b/src/linear/lasso.rs @@ -418,7 +418,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index a5c76999..43410bbb 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -40,7 +40,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -341,7 +341,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, @@ -393,7 +394,7 @@ mod tests { // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - // ]); + // ]).unwrap(); // let y = vec![ // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 0defd0fa..12ecf8d8 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -35,7 +35,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! ]; @@ -416,7 +416,7 @@ impl, Y: /// Fits Logistic Regression to your data. /// * `x` - _NxM_ matrix with _N_ observations and _M_ features in each observation. /// * `y` - target class values - /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values. + /// * `parameters` - other parameters, use `Default::default()` to set parameters to default values. pub fn fit( x: &X, y: &Y, @@ -611,7 +611,8 @@ mod tests { &[10., -2.], &[8., 2.], &[9., 0.], - ]); + ]) + .unwrap(); let y = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; @@ -671,7 +672,8 @@ mod tests { &[10., -2.], &[8., 2.], &[9., 0.], - ]); + ]) + .unwrap(); let y = vec![0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1]; @@ -733,7 +735,8 @@ mod tests { &[10., -2.], &[8., 2.], &[9., 0.], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); @@ -818,37 +821,41 @@ mod tests { assert!(reg_coeff_sum < coeff); } - // TODO: serialization for the new DenseMatrix needs to be implemented - // #[cfg_attr(all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test)] - // #[test] - // #[cfg(feature = "serde")] - // fn serde() { - // let x = DenseMatrix::from_2d_array(&[ - // &[1., -5.], - // &[2., 5.], - // &[3., -2.], - // &[1., 2.], - // &[2., 0.], - // &[6., -5.], - // &[7., 5.], - // &[6., -2.], - // &[7., 2.], - // &[6., 0.], - // &[8., -5.], - // &[9., 5.], - // &[10., -2.], - // &[8., 2.], - // &[9., 0.], - // ]); - // let y: Vec = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; - - // let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); - - // let deserialized_lr: LogisticRegression, Vec> = - // serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); - - // assert_eq!(lr, deserialized_lr); - // } + //TODO: serialization for the new DenseMatrix needs to be implemented + #[cfg_attr( + all(target_arch = "wasm32", not(target_os = "wasi")), + wasm_bindgen_test::wasm_bindgen_test + )] + #[test] + #[cfg(feature = "serde")] + fn serde() { + let x: DenseMatrix = DenseMatrix::from_2d_array(&[ + &[1., -5.], + &[2., 5.], + &[3., -2.], + &[1., 2.], + &[2., 0.], + &[6., -5.], + &[7., 5.], + &[6., -2.], + &[7., 2.], + &[6., 0.], + &[8., -5.], + &[9., 5.], + &[10., -2.], + &[8., 2.], + &[9., 0.], + ]) + .unwrap(); + let y: Vec = vec![0, 0, 1, 1, 2, 1, 1, 0, 0, 2, 1, 1, 0, 0, 1]; + + let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); + + let deserialized_lr: LogisticRegression, Vec> = + serde_json::from_str(&serde_json::to_string(&lr).unwrap()).unwrap(); + + assert_eq!(lr, deserialized_lr); + } #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), @@ -877,7 +884,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); @@ -899,4 +907,46 @@ mod tests { assert!(reg_coeff_sum < coeff); } + #[cfg_attr( + all(target_arch = "wasm32", not(target_os = "wasi")), + wasm_bindgen_test::wasm_bindgen_test + )] + #[test] + fn lr_fit_predict_random() { + let x: DenseMatrix = DenseMatrix::rand(52181, 94); + let y1: Vec = vec![1; 2181]; + let y2: Vec = vec![0; 50000]; + let y: Vec = y1.into_iter().chain(y2.into_iter()).collect(); + + let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); + let lr_reg = LogisticRegression::fit( + &x, + &y, + LogisticRegressionParameters::default().with_alpha(1.0), + ) + .unwrap(); + + let y_hat = lr.predict(&x).unwrap(); + let y_hat_reg = lr_reg.predict(&x).unwrap(); + + assert_eq!(y.len(), y_hat.len()); + assert_eq!(y.len(), y_hat_reg.len()); + } + + #[test] + fn test_logit() { + let x: &DenseMatrix = &DenseMatrix::rand(52181, 94); + let y1: Vec = vec![1; 2181]; + let y2: Vec = vec![0; 50000]; + let y: &Vec = &(y1.into_iter().chain(y2.into_iter()).collect()); + println!("y vec height: {:?}", y.len()); + println!("x matrix shape: {:?}", x.shape()); + + let lr = LogisticRegression::fit(x, y, Default::default()).unwrap(); + let y_hat = lr.predict(&x).unwrap(); + + println!("y_hat shape: {:?}", y_hat.shape()); + + assert_eq!(y_hat.shape(), 52181); + } } diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 2c354299..be2f3d41 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -40,7 +40,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -455,7 +455,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -513,7 +514,7 @@ mod tests { // &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], // &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], // &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - // ]); + // ]).unwrap(); // let y = vec![ // 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/metrics/distance/mahalanobis.rs b/src/metrics/distance/mahalanobis.rs index e526c20e..a9347a58 100644 --- a/src/metrics/distance/mahalanobis.rs +++ b/src/metrics/distance/mahalanobis.rs @@ -25,7 +25,7 @@ //! &[68., 590., 37.], //! &[69., 660., 46.], //! &[73., 600., 55.], -//! ]); +//! ]).unwrap(); //! //! let a = data.mean_by(0); //! let b = vec![66., 640., 44.]; @@ -151,7 +151,8 @@ mod tests { &[68., 590., 37.], &[69., 660., 46.], &[73., 600., 55.], - ]); + ]) + .unwrap(); let a = data.mean_by(0); let b = vec![66., 640., 44.]; diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index c7e1be3d..a7184293 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -37,7 +37,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! ]; diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index 27571fdb..e72787b7 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -36,7 +36,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., //! ]; @@ -84,7 +84,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //! ]; @@ -396,7 +396,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let cv = KFold { @@ -441,7 +442,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -489,7 +491,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -539,7 +542,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; let cv = KFold::default().with_n_splits(3); diff --git a/src/naive_bayes/bernoulli.rs b/src/naive_bayes/bernoulli.rs index 52a3c52e..33f00bd4 100644 --- a/src/naive_bayes/bernoulli.rs +++ b/src/naive_bayes/bernoulli.rs @@ -19,14 +19,14 @@ //! &[0, 1, 0, 0, 1, 0], //! &[0, 1, 0, 1, 0, 0], //! &[0, 1, 1, 0, 0, 1], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![0, 0, 0, 1]; //! //! let nb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); //! //! // Testing data point is: //! // Chinese Chinese Chinese Tokyo Japan -//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]); +//! let x_test = DenseMatrix::from_2d_array(&[&[0, 1, 1, 0, 0, 1]]).unwrap(); //! let y_hat = nb.predict(&x_test).unwrap(); //! ``` //! @@ -527,7 +527,8 @@ mod tests { &[0.0, 1.0, 0.0, 0.0, 1.0, 0.0], &[0.0, 1.0, 0.0, 1.0, 0.0, 0.0], &[0.0, 1.0, 1.0, 0.0, 0.0, 1.0], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 1]; let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); @@ -558,7 +559,7 @@ mod tests { // Testing data point is: // Chinese Chinese Chinese Tokyo Japan - let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]); + let x_test = DenseMatrix::from_2d_array(&[&[0.0, 1.0, 1.0, 0.0, 0.0, 1.0]]).unwrap(); let y_hat = bnb.predict(&x_test).unwrap(); assert_eq!(y_hat, &[1]); @@ -586,7 +587,8 @@ mod tests { &[2, 0, 3, 3, 1, 2, 0, 2, 4, 1], &[2, 4, 0, 4, 2, 4, 1, 3, 1, 4], &[0, 2, 2, 3, 4, 0, 4, 4, 4, 4], - ]); + ]) + .unwrap(); let y: Vec = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2]; let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); @@ -643,7 +645,8 @@ mod tests { &[0, 1, 0, 0, 1, 0], &[0, 1, 0, 1, 0, 0], &[0, 1, 1, 0, 0, 1], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 1]; let bnb = BernoulliNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs index 2c0c7a4a..71a7487a 100644 --- a/src/naive_bayes/categorical.rs +++ b/src/naive_bayes/categorical.rs @@ -24,7 +24,7 @@ //! &[3, 4, 2, 4], //! &[0, 3, 1, 2], //! &[0, 4, 1, 2], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; //! //! let nb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); @@ -455,7 +455,8 @@ mod tests { &[1, 1, 1, 1], &[1, 2, 0, 0], &[2, 1, 1, 1], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); @@ -513,7 +514,7 @@ mod tests { ] ); - let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]); + let x_test = DenseMatrix::from_2d_array(&[&[0, 2, 1, 0], &[2, 2, 0, 0]]).unwrap(); let y_hat = cnb.predict(&x_test).unwrap(); assert_eq!(y_hat, vec![0, 1]); } @@ -539,7 +540,8 @@ mod tests { &[3, 4, 2, 4], &[0, 3, 1, 2], &[0, 4, 1, 2], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); @@ -571,7 +573,8 @@ mod tests { &[3, 4, 2, 4], &[0, 3, 1, 2], &[0, 4, 1, 2], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0]; let cnb = CategoricalNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index d72d27de..aff996be 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -16,7 +16,7 @@ //! &[ 1., 1.], //! &[ 2., 1.], //! &[ 3., 2.], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![1, 1, 1, 2, 2, 2]; //! //! let nb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); @@ -395,7 +395,8 @@ mod tests { &[1., 1.], &[2., 1.], &[3., 2.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 1, 2, 2, 2]; let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); @@ -435,7 +436,8 @@ mod tests { &[1., 1.], &[2., 1.], &[3., 2.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 1, 2, 2, 2]; let priors = vec![0.3, 0.7]; @@ -462,7 +464,8 @@ mod tests { &[1., 1.], &[2., 1.], &[3., 2.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 1, 2, 2, 2]; let gnb = GaussianNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 11614d14..1d74a315 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -169,7 +169,7 @@ mod tests { #[test] fn test_predict() { - let matrix = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]); + let matrix = DenseMatrix::from_2d_array(&[&[1, 2, 3], &[4, 5, 6], &[7, 8, 9]]).unwrap(); let val = vec![]; match Model::fit(TestDistribution(&val)).unwrap().predict(&matrix) { diff --git a/src/naive_bayes/multinomial.rs b/src/naive_bayes/multinomial.rs index a340c40d..2d6c437c 100644 --- a/src/naive_bayes/multinomial.rs +++ b/src/naive_bayes/multinomial.rs @@ -20,13 +20,13 @@ //! &[0, 2, 0, 0, 1, 0], //! &[0, 1, 0, 1, 0, 0], //! &[0, 1, 1, 0, 0, 1], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![0, 0, 0, 1]; //! let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); //! //! // Testing data point is: //! // Chinese Chinese Chinese Tokyo Japan -//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]); +//! let x_test = DenseMatrix::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap(); //! let y_hat = nb.predict(&x_test).unwrap(); //! ``` //! @@ -433,7 +433,8 @@ mod tests { &[0, 2, 0, 0, 1, 0], &[0, 1, 0, 1, 0, 0], &[0, 1, 1, 0, 0, 1], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 1]; let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); @@ -467,7 +468,7 @@ mod tests { // Testing data point is: // Chinese Chinese Chinese Tokyo Japan - let x_test = DenseMatrix::::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]); + let x_test = DenseMatrix::::from_2d_array(&[&[0, 3, 1, 0, 0, 1]]).unwrap(); let y_hat = mnb.predict(&x_test).unwrap(); assert_eq!(y_hat, &[0]); @@ -495,7 +496,8 @@ mod tests { &[2, 0, 3, 3, 1, 2, 0, 2, 4, 1], &[2, 4, 0, 4, 2, 4, 1, 3, 1, 4], &[0, 2, 2, 3, 4, 0, 4, 4, 4, 4], - ]); + ]) + .unwrap(); let y: Vec = vec![2, 2, 0, 0, 0, 2, 1, 1, 0, 1, 0, 0, 2, 0, 2]; let nb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); @@ -554,7 +556,8 @@ mod tests { &[0, 1, 0, 0, 1, 0], &[0, 1, 0, 1, 0, 0], &[0, 1, 1, 0, 0, 1], - ]); + ]) + .unwrap(); let y = vec![0, 0, 0, 1]; let mnb = MultinomialNB::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index a70a12ec..d18620c9 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -22,7 +22,7 @@ //! &[3., 4.], //! &[5., 6.], //! &[7., 8.], -//! &[9., 10.]]); +//! &[9., 10.]]).unwrap(); //! let y = vec![2, 2, 2, 3, 3]; //your class labels //! //! let knn = KNNClassifier::fit(&x, &y, Default::default()).unwrap(); @@ -211,7 +211,7 @@ impl, Y: Array1, D: Distance, Y: Array1, D: Distance>> { /// Fits KNN regressor to a NxM matrix where N is number of samples and M is number of features. /// * `x` - training data - /// * `y` - vector with real values + /// * `y` - vector with real values /// * `parameters` - additional parameters like search algorithm and k pub fn fit( x: &X, @@ -295,7 +295,8 @@ mod tests { #[test] fn knn_fit_predict_weighted() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y: Vec = vec![1., 2., 3., 4., 5.]; let y_exp = [1., 2., 3., 4., 5.]; let knn = KNNRegressor::fit( @@ -322,7 +323,8 @@ mod tests { #[test] fn knn_fit_predict_uniform() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y: Vec = vec![1., 2., 3., 4., 5.]; let y_exp = [2., 2., 3., 4., 4.]; let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); @@ -341,7 +343,8 @@ mod tests { #[cfg(feature = "serde")] fn serde() { let x = - DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]); + DenseMatrix::from_2d_array(&[&[1., 2.], &[3., 4.], &[5., 6.], &[7., 8.], &[9., 10.]]) + .unwrap(); let y = vec![1., 2., 3., 4., 5.]; let knn = KNNRegressor::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/preprocessing/categorical.rs b/src/preprocessing/categorical.rs index dfa7d239..e17dea78 100644 --- a/src/preprocessing/categorical.rs +++ b/src/preprocessing/categorical.rs @@ -12,7 +12,7 @@ //! &[1.5, 2.0, 1.5, 4.0], //! &[1.5, 1.0, 1.5, 5.0], //! &[1.5, 2.0, 1.5, 6.0], -//! ]); +//! ]).unwrap(); //! let encoder_params = OneHotEncoderParams::from_cat_idx(&[1, 3]); //! // Infer number of categories from data and return a reusable encoder //! let encoder = OneHotEncoder::fit(&data, encoder_params).unwrap(); @@ -240,14 +240,16 @@ mod tests { &[2.0, 1.5, 4.0], &[1.0, 1.5, 5.0], &[2.0, 1.5, 6.0], - ]); + ]) + .unwrap(); let oh_enc = DenseMatrix::from_2d_array(&[ &[1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0], &[0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0], &[1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0], &[0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0], - ]); + ]) + .unwrap(); (orig, oh_enc) } @@ -259,14 +261,16 @@ mod tests { &[1.5, 2.0, 1.5, 4.0], &[1.5, 1.0, 1.5, 5.0], &[1.5, 2.0, 1.5, 6.0], - ]); + ]) + .unwrap(); let oh_enc = DenseMatrix::from_2d_array(&[ &[1.5, 1.0, 0.0, 1.5, 1.0, 0.0, 0.0, 0.0], &[1.5, 0.0, 1.0, 1.5, 0.0, 1.0, 0.0, 0.0], &[1.5, 1.0, 0.0, 1.5, 0.0, 0.0, 1.0, 0.0], &[1.5, 0.0, 1.0, 1.5, 0.0, 0.0, 0.0, 1.0], - ]); + ]) + .unwrap(); (orig, oh_enc) } @@ -334,7 +338,8 @@ mod tests { &[2.0, 1.5, 4.0], &[1.0, 1.5, 5.0], &[2.0, 1.5, 6.0], - ]); + ]) + .unwrap(); let params = OneHotEncoderParams::from_cat_idx(&[1]); let result = OneHotEncoder::fit(&m, params); diff --git a/src/preprocessing/numerical.rs b/src/preprocessing/numerical.rs index bd5f00d1..ddb74a45 100644 --- a/src/preprocessing/numerical.rs +++ b/src/preprocessing/numerical.rs @@ -11,7 +11,7 @@ //! vec![0.0, 0.0], //! vec![1.0, 1.0], //! vec![1.0, 1.0], -//! ]); +//! ]).unwrap(); //! //! let standard_scaler = //! numerical::StandardScaler::fit(&data, numerical::StandardScalerParameters::default()) @@ -24,7 +24,7 @@ //! vec![-1.0, -1.0], //! vec![1.0, 1.0], //! vec![1.0, 1.0], -//! ]) +//! ]).unwrap() //! ); //! ``` use std::marker::PhantomData; @@ -197,15 +197,18 @@ mod tests { fn combine_three_columns() { assert_eq!( build_matrix_from_columns(vec![ - DenseMatrix::from_2d_vec(&[vec![1.0], vec![1.0], vec![1.0]]), - DenseMatrix::from_2d_vec(&[vec![2.0], vec![2.0], vec![2.0]]), - DenseMatrix::from_2d_vec(&[vec![3.0], vec![3.0], vec![3.0]]) + DenseMatrix::from_2d_vec(&vec![vec![1.0], vec![1.0], vec![1.0],]).unwrap(), + DenseMatrix::from_2d_vec(&vec![vec![2.0], vec![2.0], vec![2.0],]).unwrap(), + DenseMatrix::from_2d_vec(&vec![vec![3.0], vec![3.0], vec![3.0],]).unwrap() ]), - Some(DenseMatrix::from_2d_vec(&[ - vec![1.0, 2.0, 3.0], - vec![1.0, 2.0, 3.0], - vec![1.0, 2.0, 3.0] - ])) + Some( + DenseMatrix::from_2d_vec(&vec![ + vec![1.0, 2.0, 3.0], + vec![1.0, 2.0, 3.0], + vec![1.0, 2.0, 3.0] + ]) + .unwrap() + ) ) } @@ -287,13 +290,15 @@ mod tests { /// sklearn. #[test] fn fit_transform_random_values() { - let transformed_values = - fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[ + let transformed_values = fit_transform_with_default_standard_scaler( + &DenseMatrix::from_2d_array(&[ &[0.1004222429, 0.2194113576, 0.9310663354, 0.3313593793], &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], - ])); + ]) + .unwrap(), + ); println!("{transformed_values}"); assert!(transformed_values.approximate_eq( &DenseMatrix::from_2d_array(&[ @@ -301,7 +306,8 @@ mod tests { &[-0.7615464283, -0.7076698384, -1.1075452562, 1.2632979631], &[0.4832504303, -0.6106747444, 1.0630075435, 0.5494084257], &[1.3936980634, 1.7215431158, -0.8839228078, -1.3855590021], - ]), + ]) + .unwrap(), 1.0 )) } @@ -310,13 +316,10 @@ mod tests { #[test] fn fit_transform_with_zero_variance() { assert_eq!( - fit_transform_with_default_standard_scaler(&DenseMatrix::from_2d_array(&[ - &[1.0], - &[1.0], - &[1.0], - &[1.0] - ])), - DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]), + fit_transform_with_default_standard_scaler( + &DenseMatrix::from_2d_array(&[&[1.0], &[1.0], &[1.0], &[1.0]]).unwrap() + ), + DenseMatrix::from_2d_array(&[&[0.0], &[0.0], &[0.0], &[0.0]]).unwrap(), "When scaling values with zero variance, zero is expected as return value" ) } @@ -331,7 +334,8 @@ mod tests { &[1.0, 2.0, 5.0], &[1.0, 1.0, 1.0], &[1.0, 2.0, 5.0] - ]), + ]) + .unwrap(), StandardScalerParameters::default(), ), Ok(StandardScaler { @@ -354,7 +358,8 @@ mod tests { &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], - ]), + ]) + .unwrap(), StandardScalerParameters::default(), ) .unwrap(); @@ -364,17 +369,18 @@ mod tests { vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625], ); - assert!( - &DenseMatrix::::from_2d_vec(&[fitted_scaler.stds]).approximate_eq( + assert!(&DenseMatrix::::from_2d_vec(&vec![fitted_scaler.stds]) + .unwrap() + .approximate_eq( &DenseMatrix::from_2d_array(&[&[ 0.29426447500954, 0.16758497615485, 0.20820945786863, 0.23329718831165 - ],]), + ],]) + .unwrap(), 0.00000000000001 - ) - ) + )) } /// If `with_std` is set to `false` the values should not be @@ -392,8 +398,9 @@ mod tests { }; assert_eq!( - standard_scaler.transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]])), - Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]])) + standard_scaler + .transform(&DenseMatrix::from_2d_array(&[&[0.0, 2.0], &[2.0, 4.0]]).unwrap()), + Ok(DenseMatrix::from_2d_array(&[&[-1.0, -1.0], &[1.0, 1.0]]).unwrap()) ) } @@ -413,8 +420,8 @@ mod tests { assert_eq!( standard_scaler - .transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]])), - Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]])) + .transform(&DenseMatrix::from_2d_array(&[&[0.0, 9.0], &[4.0, 12.0]]).unwrap()), + Ok(DenseMatrix::from_2d_array(&[&[0.0, 3.0], &[2.0, 4.0]]).unwrap()) ) } @@ -433,7 +440,8 @@ mod tests { &[0.2045493861, 0.1683865411, 0.5071506765, 0.7257355264], &[0.5708488802, 0.1846414616, 0.9590802982, 0.5591871046], &[0.8387612750, 0.5754861361, 0.5537109852, 0.1077646442], - ]), + ]) + .unwrap(), StandardScalerParameters::default(), ) .unwrap(); @@ -446,17 +454,18 @@ mod tests { vec![0.42864544605, 0.2869813741, 0.737752073825, 0.431011663625], ); - assert!( - &DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds]).approximate_eq( + assert!(&DenseMatrix::from_2d_vec(&vec![deserialized_scaler.stds]) + .unwrap() + .approximate_eq( &DenseMatrix::from_2d_array(&[&[ 0.29426447500954, 0.16758497615485, 0.20820945786863, 0.23329718831165 - ],]), + ],]) + .unwrap(), 0.00000000000001 - ) - ) + )) } } } diff --git a/src/readers/csv.rs b/src/readers/csv.rs index d67d4b5f..f8a03ebd 100644 --- a/src/readers/csv.rs +++ b/src/readers/csv.rs @@ -238,7 +238,8 @@ mod tests { &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], &[4.7, 3.2, 1.3, 0.2], - ])) + ]) + .unwrap()) ) } #[test] @@ -261,7 +262,7 @@ mod tests { &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], &[4.7, 3.2, 1.3, 0.2], - ])) + ]).unwrap()) ) } #[test] diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 252d43af..6477778b 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -53,7 +53,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y = vec![ -1, -1, -1, -1, -1, -1, -1, -1, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! @@ -957,7 +957,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -983,7 +984,8 @@ mod tests { )] #[test] fn svc_fit_decision_function() { - let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]); + let x = DenseMatrix::from_2d_array(&[&[4.0, 0.0], &[0.0, 4.0], &[8.0, 0.0], &[0.0, 8.0]]) + .unwrap(); let x2 = DenseMatrix::from_2d_array(&[ &[3.0, 3.0], @@ -992,7 +994,8 @@ mod tests { &[10.0, 10.0], &[1.0, 1.0], &[0.0, 0.0], - ]); + ]) + .unwrap(); let y: Vec = vec![-1, -1, 1, 1]; @@ -1045,7 +1048,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -1094,7 +1098,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![ -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 7511aeaf..e68ebf85 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -44,7 +44,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! //! let y: Vec = vec![83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, //! 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9]; @@ -640,7 +640,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, @@ -688,7 +689,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 3c9deaf7..4da9f443 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -48,7 +48,7 @@ //! &[4.9, 2.4, 3.3, 1.0], //! &[6.6, 2.9, 4.6, 1.3], //! &[5.2, 2.7, 3.9, 1.4], -//! ]); +//! ]).unwrap(); //! let y = vec![ 0, 0, 0, 0, 0, 0, 0, 0, //! 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; //! @@ -964,7 +964,8 @@ mod tests { &[4.9, 2.4, 3.3, 1.0], &[6.6, 2.9, 4.6, 1.3], &[5.2, 2.7, 3.9, 1.4], - ]); + ]) + .unwrap(); let y: Vec = vec![0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; assert_eq!( @@ -1031,7 +1032,8 @@ mod tests { &[0., 0., 1., 1.], &[0., 0., 0., 0.], &[0., 0., 0., 1.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; assert_eq!( @@ -1065,7 +1067,8 @@ mod tests { &[0., 0., 1., 1.], &[0., 0., 0., 0.], &[0., 0., 0., 1.], - ]); + ]) + .unwrap(); let y: Vec = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); assert_eq!( @@ -1106,7 +1109,8 @@ mod tests { &[0., 0., 1., 1.], &[0., 0., 0., 0.], &[0., 0., 0., 1.], - ]); + ]) + .unwrap(); let y = vec![1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0]; let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 21832ba4..1569af2e 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -39,7 +39,7 @@ //! &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], //! &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], //! &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], -//! ]); +//! ]).unwrap(); //! let y: Vec = vec![ //! 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, //! 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -753,7 +753,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, @@ -834,7 +835,8 @@ mod tests { &[502.601, 393.1, 251.4, 125.368, 1960., 69.564], &[518.173, 480.6, 257.2, 127.852, 1961., 69.331], &[554.894, 400.7, 282.7, 130.081, 1962., 70.551], - ]); + ]) + .unwrap(); let y: Vec = vec![ 83.0, 88.5, 88.2, 89.5, 96.2, 98.1, 99.0, 100.0, 101.2, 104.6, 108.4, 110.8, 112.6, 114.2, 115.7, 116.9, From ba75f9ffada069445c7f0b510d3aba770cd5eb60 Mon Sep 17 00:00:00 2001 From: morenol <22335041+morenol@users.noreply.github.com> Date: Mon, 25 Nov 2024 10:34:29 -0500 Subject: [PATCH 13/15] chore: fix clippy (#283) * chore: fix clippy Co-authored-by: Luis Moreno --- .github/workflows/ci.yml | 2 +- src/algorithm/neighbour/cover_tree.rs | 8 +- src/algorithm/neighbour/fastpair.rs | 10 +- src/algorithm/neighbour/linear_search.rs | 4 +- src/algorithm/sort/heap_select.rs | 4 +- src/algorithm/sort/quick_sort.rs | 1 + src/cluster/kmeans.rs | 8 +- src/linalg/basic/arrays.rs | 152 +++++++++--------- src/linalg/traits/evd.rs | 4 +- src/linalg/traits/stats.rs | 4 +- src/linalg/traits/svd.rs | 4 +- src/linear/bg_solver.rs | 8 +- src/linear/lasso_optimizer.rs | 14 +- src/linear/logistic_regression.rs | 21 ++- src/naive_bayes/bernoulli.rs | 7 +- src/naive_bayes/categorical.rs | 5 +- src/naive_bayes/gaussian.rs | 5 +- src/naive_bayes/mod.rs | 3 +- src/naive_bayes/multinomial.rs | 7 +- src/neighbors/knn_classifier.rs | 1 + src/neighbors/knn_regressor.rs | 7 +- .../first_order/gradient_descent.rs | 17 +- src/optimization/first_order/lbfgs.rs | 39 ++--- src/optimization/first_order/mod.rs | 16 +- src/optimization/line_search.rs | 29 ++-- src/optimization/mod.rs | 16 +- src/svm/mod.rs | 2 +- src/tree/decision_tree_classifier.rs | 18 +-- src/tree/decision_tree_regressor.rs | 14 +- 29 files changed, 194 insertions(+), 236 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 89b3b37e..d7942c8f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,7 +36,7 @@ jobs: - name: Install Rust toolchain uses: actions-rs/toolchain@v1 with: - toolchain: stable + toolchain: 1.81 # 1.82 seems to break wasm32 tests https://github.com/rustwasm/wasm-bindgen/issues/4274 target: ${{ matrix.platform.target }} profile: minimal default: true diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index 011a9cc0..9989ae24 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -124,7 +124,7 @@ impl> CoverTree { current_cover_set.push((d, &self.root)); let mut heap = HeapSelection::with_capacity(k); - heap.add(std::f64::MAX); + heap.add(f64::MAX); let mut empty_heap = true; if !self.identical_excluded || self.get_data_value(self.root.idx) != p { @@ -145,7 +145,7 @@ impl> CoverTree { } let upper_bound = if empty_heap { - std::f64::INFINITY + f64::INFINITY } else { *heap.peek() }; @@ -291,7 +291,7 @@ impl> CoverTree { } else { let max_dist = self.max(point_set); let next_scale = (max_scale - 1).min(self.get_scale(max_dist)); - if next_scale == std::i64::MIN { + if next_scale == i64::MIN { let mut children: Vec = Vec::new(); let mut leaf = self.new_leaf(p); children.push(leaf); @@ -435,7 +435,7 @@ impl> CoverTree { fn get_scale(&self, d: f64) -> i64 { if d == 0f64 { - std::i64::MIN + i64::MIN } else { (self.inv_log_base * d.ln()).ceil() as i64 } diff --git a/src/algorithm/neighbour/fastpair.rs b/src/algorithm/neighbour/fastpair.rs index 759d018c..9f663f67 100644 --- a/src/algorithm/neighbour/fastpair.rs +++ b/src/algorithm/neighbour/fastpair.rs @@ -52,10 +52,8 @@ pub struct FastPair<'a, T: RealNumber + FloatNumber, M: Array2> { } impl<'a, T: RealNumber + FloatNumber, M: Array2> FastPair<'a, T, M> { - /// /// Constructor - /// Instantiate and inizialise the algorithm - /// + /// Instantiate and initialize the algorithm pub fn new(m: &'a M) -> Result { if m.shape().0 < 3 { return Err(Failed::because( @@ -74,10 +72,8 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2> FastPair<'a, T, M> { Ok(init) } - /// /// Initialise `FastPair` by passing a `Array2`. /// Build a FastPairs data-structure from a set of (new) points. - /// fn init(&mut self) { // basic measures let len = self.samples.shape().0; @@ -158,9 +154,7 @@ impl<'a, T: RealNumber + FloatNumber, M: Array2> FastPair<'a, T, M> { self.neighbours = neighbours; } - /// /// Find closest pair by scanning list of nearest neighbors. - /// #[allow(dead_code)] pub fn closest_pair(&self) -> PairwiseDistance { let mut a = self.neighbours[0]; // Start with first point @@ -217,9 +211,7 @@ mod tests_fastpair { use super::*; use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix}; - /// /// Brute force algorithm, used only for comparison and testing - /// pub fn closest_pair_brute(fastpair: &FastPair>) -> PairwiseDistance { use itertools::Itertools; let m = fastpair.samples.shape().0; diff --git a/src/algorithm/neighbour/linear_search.rs b/src/algorithm/neighbour/linear_search.rs index b1ce7270..6bc82176 100644 --- a/src/algorithm/neighbour/linear_search.rs +++ b/src/algorithm/neighbour/linear_search.rs @@ -61,7 +61,7 @@ impl> LinearKNNSearch { for _ in 0..k { heap.add(KNNPoint { - distance: std::f64::INFINITY, + distance: f64::INFINITY, index: None, }); } @@ -215,7 +215,7 @@ mod tests { }; let point_inf = KNNPoint { - distance: std::f64::INFINITY, + distance: f64::INFINITY, index: Some(3), }; diff --git a/src/algorithm/sort/heap_select.rs b/src/algorithm/sort/heap_select.rs index 23d2704a..8a4ef78a 100644 --- a/src/algorithm/sort/heap_select.rs +++ b/src/algorithm/sort/heap_select.rs @@ -133,7 +133,7 @@ mod tests { #[test] fn test_add1() { let mut heap = HeapSelection::with_capacity(3); - heap.add(std::f64::INFINITY); + heap.add(f64::INFINITY); heap.add(-5f64); heap.add(4f64); heap.add(-1f64); @@ -151,7 +151,7 @@ mod tests { #[test] fn test_add2() { let mut heap = HeapSelection::with_capacity(3); - heap.add(std::f64::INFINITY); + heap.add(f64::INFINITY); heap.add(0.0); heap.add(8.4852); heap.add(5.6568); diff --git a/src/algorithm/sort/quick_sort.rs b/src/algorithm/sort/quick_sort.rs index 97d34e7c..e64c4243 100644 --- a/src/algorithm/sort/quick_sort.rs +++ b/src/algorithm/sort/quick_sort.rs @@ -3,6 +3,7 @@ use num_traits::Num; pub trait QuickArgSort { fn quick_argsort_mut(&mut self) -> Vec; + #[allow(dead_code)] fn quick_argsort(&self) -> Vec; } diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 6609ace4..2fade68f 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -96,7 +96,7 @@ impl, Y: Array1> PartialEq for KMeans< return false; } for j in 0..self.centroids[i].len() { - if (self.centroids[i][j] - other.centroids[i][j]).abs() > std::f64::EPSILON { + if (self.centroids[i][j] - other.centroids[i][j]).abs() > f64::EPSILON { return false; } } @@ -270,7 +270,7 @@ impl, Y: Array1> KMeans let (n, d) = data.shape(); - let mut distortion = std::f64::MAX; + let mut distortion = f64::MAX; let mut y = KMeans::::kmeans_plus_plus(data, parameters.k, parameters.seed); let mut size = vec![0; parameters.k]; let mut centroids = vec![vec![0f64; d]; parameters.k]; @@ -331,7 +331,7 @@ impl, Y: Array1> KMeans let mut row = vec![0f64; x.shape().1]; for i in 0..n { - let mut min_dist = std::f64::MAX; + let mut min_dist = f64::MAX; let mut best_cluster = 0; for j in 0..self.k { @@ -361,7 +361,7 @@ impl, Y: Array1> KMeans .cloned() .collect(); - let mut d = vec![std::f64::MAX; n]; + let mut d = vec![f64::MAX; n]; let mut row = vec![TX::zero(); data.shape().1]; for j in 1..k { diff --git a/src/linalg/basic/arrays.rs b/src/linalg/basic/arrays.rs index 99df2078..3c889722 100644 --- a/src/linalg/basic/arrays.rs +++ b/src/linalg/basic/arrays.rs @@ -265,11 +265,11 @@ pub trait ArrayView1: Array { if p.is_infinite() && p.is_sign_positive() { self.iterator(0) .map(|x| x.to_f64().unwrap().abs()) - .fold(std::f64::NEG_INFINITY, |a, b| a.max(b)) + .fold(f64::NEG_INFINITY, |a, b| a.max(b)) } else if p.is_infinite() && p.is_sign_negative() { self.iterator(0) .map(|x| x.to_f64().unwrap().abs()) - .fold(std::f64::INFINITY, |a, b| a.min(b)) + .fold(f64::INFINITY, |a, b| a.min(b)) } else { let mut norm = 0f64; @@ -558,11 +558,11 @@ pub trait ArrayView2: Array: pub trait MutArrayView2: MutArray + ArrayView2 { - /// + /// copy values from another array fn copy_from(&mut self, other: &dyn Array) { self.iterator_mut(0) .zip(other.iterator(0)) .for_each(|(s, o)| *s = *o); } - /// + /// update view with absolute values fn abs_mut(&mut self) where T: Number + Signed, { self.iterator_mut(0).for_each(|v| *v = v.abs()); } - /// + /// update view values with opposite sign fn neg_mut(&mut self) where T: Number + Neg, { self.iterator_mut(0).for_each(|v| *v = -*v); } - /// + /// update view values at power `p` fn pow_mut(&mut self, p: T) where T: RealNumber, { self.iterator_mut(0).for_each(|v| *v = v.powf(p)); } - /// + /// scale view values fn scale_mut(&mut self, mean: &[T], std: &[T], axis: u8) where T: Number, @@ -784,27 +784,27 @@ pub trait MutArrayView2: /// Trait for mutable 1D-array view pub trait Array1: MutArrayView1 + Sized + Clone { - /// + /// return a view of the array fn slice<'a>(&'a self, range: Range) -> Box + 'a>; - /// + /// return a mutable view of the array fn slice_mut<'a>(&'a mut self, range: Range) -> Box + 'a>; - /// + /// fill array with a given value fn fill(len: usize, value: T) -> Self where Self: Sized; - /// + /// create array from iterator fn from_iterator>(iter: I, len: usize) -> Self where Self: Sized; - /// + /// create array from vector fn from_vec_slice(slice: &[T]) -> Self where Self: Sized; - /// + /// create array from slice fn from_slice(slice: &'_ dyn ArrayView1) -> Self where Self: Sized; - /// + /// create a zero array fn zeros(len: usize) -> Self where T: Number, @@ -812,7 +812,7 @@ pub trait Array1: MutArrayView1 + Sized + { Self::fill(len, T::zero()) } - /// + /// create an array of ones fn ones(len: usize) -> Self where T: Number, @@ -820,7 +820,7 @@ pub trait Array1: MutArrayView1 + Sized + { Self::fill(len, T::one()) } - /// + /// create an array of random values fn rand(len: usize) -> Self where T: RealNumber, @@ -828,7 +828,7 @@ pub trait Array1: MutArrayView1 + Sized + { Self::from_iterator((0..len).map(|_| T::rand()), len) } - /// + /// add a scalar to the array fn add_scalar(&self, x: T) -> Self where T: Number, @@ -838,7 +838,7 @@ pub trait Array1: MutArrayView1 + Sized + result.add_scalar_mut(x); result } - /// + /// subtract a scalar from the array fn sub_scalar(&self, x: T) -> Self where T: Number, @@ -848,7 +848,7 @@ pub trait Array1: MutArrayView1 + Sized + result.sub_scalar_mut(x); result } - /// + /// divide a scalar from the array fn div_scalar(&self, x: T) -> Self where T: Number, @@ -858,7 +858,7 @@ pub trait Array1: MutArrayView1 + Sized + result.div_scalar_mut(x); result } - /// + /// multiply a scalar to the array fn mul_scalar(&self, x: T) -> Self where T: Number, @@ -868,7 +868,7 @@ pub trait Array1: MutArrayView1 + Sized + result.mul_scalar_mut(x); result } - /// + /// sum of two arrays fn add(&self, other: &dyn Array) -> Self where T: Number, @@ -878,7 +878,7 @@ pub trait Array1: MutArrayView1 + Sized + result.add_mut(other); result } - /// + /// subtract two arrays fn sub(&self, other: &impl Array1) -> Self where T: Number, @@ -888,7 +888,7 @@ pub trait Array1: MutArrayView1 + Sized + result.sub_mut(other); result } - /// + /// multiply two arrays fn mul(&self, other: &dyn Array) -> Self where T: Number, @@ -898,7 +898,7 @@ pub trait Array1: MutArrayView1 + Sized + result.mul_mut(other); result } - /// + /// divide two arrays fn div(&self, other: &dyn Array) -> Self where T: Number, @@ -908,7 +908,7 @@ pub trait Array1: MutArrayView1 + Sized + result.div_mut(other); result } - /// + /// replace values with another array fn take(&self, index: &[usize]) -> Self where Self: Sized, @@ -920,7 +920,7 @@ pub trait Array1: MutArrayView1 + Sized + ); Self::from_iterator(index.iter().map(move |&i| *self.get(i)), index.len()) } - /// + /// create a view of the array with absolute values fn abs(&self) -> Self where T: Number + Signed, @@ -930,7 +930,7 @@ pub trait Array1: MutArrayView1 + Sized + result.abs_mut(); result } - /// + /// create a view of the array with opposite sign fn neg(&self) -> Self where T: Number + Neg, @@ -940,7 +940,7 @@ pub trait Array1: MutArrayView1 + Sized + result.neg_mut(); result } - /// + /// create a view of the array with values at power `p` fn pow(&self, p: T) -> Self where T: RealNumber, @@ -950,7 +950,7 @@ pub trait Array1: MutArrayView1 + Sized + result.pow_mut(p); result } - /// + /// apply argsort to the array fn argsort(&self) -> Vec where T: Number + PartialOrd, @@ -958,12 +958,12 @@ pub trait Array1: MutArrayView1 + Sized + let mut v = self.clone(); v.argsort_mut() } - /// + /// map values of the array fn map, F: FnMut(&T) -> O>(self, f: F) -> A { let len = self.shape(); A::from_iterator(self.iterator(0).map(f), len) } - /// + /// apply softmax to the array fn softmax(&self) -> Self where T: RealNumber, @@ -973,7 +973,7 @@ pub trait Array1: MutArrayView1 + Sized + result.softmax_mut(); result } - /// + /// multiply array by matrix fn xa(&self, a_transpose: bool, a: &dyn ArrayView2) -> Self where T: Number, @@ -1003,7 +1003,7 @@ pub trait Array1: MutArrayView1 + Sized + result } - /// + /// check if two arrays are approximately equal fn approximate_eq(&self, other: &Self, error: T) -> bool where T: Number + RealNumber, @@ -1015,13 +1015,13 @@ pub trait Array1: MutArrayView1 + Sized + /// Trait for mutable 2D-array view pub trait Array2: MutArrayView2 + Sized + Clone { - /// + /// fill 2d array with a given value fn fill(nrows: usize, ncols: usize, value: T) -> Self; - /// + /// get a view of the 2d array fn slice<'a>(&'a self, rows: Range, cols: Range) -> Box + 'a> where Self: Sized; - /// + /// get a mutable view of the 2d array fn slice_mut<'a>( &'a mut self, rows: Range, @@ -1029,31 +1029,31 @@ pub trait Array2: MutArrayView2 + Sized + ) -> Box + 'a> where Self: Sized; - /// + /// create 2d array from iterator fn from_iterator>(iter: I, nrows: usize, ncols: usize, axis: u8) -> Self; - /// + /// get row from 2d array fn get_row<'a>(&'a self, row: usize) -> Box + 'a> where Self: Sized; - /// + /// get column from 2d array fn get_col<'a>(&'a self, col: usize) -> Box + 'a> where Self: Sized; - /// + /// create a zero 2d array fn zeros(nrows: usize, ncols: usize) -> Self where T: Number, { Self::fill(nrows, ncols, T::zero()) } - /// + /// create a 2d array of ones fn ones(nrows: usize, ncols: usize) -> Self where T: Number, { Self::fill(nrows, ncols, T::one()) } - /// + /// create an identity matrix fn eye(size: usize) -> Self where T: Number, @@ -1066,29 +1066,29 @@ pub trait Array2: MutArrayView2 + Sized + matrix } - /// + /// create a 2d array of random values fn rand(nrows: usize, ncols: usize) -> Self where T: RealNumber, { Self::from_iterator((0..nrows * ncols).map(|_| T::rand()), nrows, ncols, 0) } - /// + /// crate from 2d slice fn from_slice(slice: &dyn ArrayView2) -> Self { let (nrows, ncols) = slice.shape(); Self::from_iterator(slice.iterator(0).cloned(), nrows, ncols, 0) } - /// + /// create from row fn from_row(slice: &dyn ArrayView1) -> Self { let ncols = slice.shape(); Self::from_iterator(slice.iterator(0).cloned(), 1, ncols, 0) } - /// + /// create from column fn from_column(slice: &dyn ArrayView1) -> Self { let nrows = slice.shape(); Self::from_iterator(slice.iterator(0).cloned(), nrows, 1, 0) } - /// + /// transpose 2d array fn transpose(&self) -> Self { let (nrows, ncols) = self.shape(); let mut m = Self::fill(ncols, nrows, *self.get((0, 0))); @@ -1099,7 +1099,7 @@ pub trait Array2: MutArrayView2 + Sized + } m } - /// + /// change shape of 2d array fn reshape(&self, nrows: usize, ncols: usize, axis: u8) -> Self { let (onrows, oncols) = self.shape(); @@ -1110,7 +1110,7 @@ pub trait Array2: MutArrayView2 + Sized + Self::from_iterator(self.iterator(0).cloned(), nrows, ncols, axis) } - /// + /// multiply two 2d arrays fn matmul(&self, other: &dyn ArrayView2) -> Self where T: Number, @@ -1136,7 +1136,7 @@ pub trait Array2: MutArrayView2 + Sized + result } - /// + /// matrix multiplication fn ab(&self, a_transpose: bool, b: &dyn ArrayView2, b_transpose: bool) -> Self where T: Number, @@ -1171,7 +1171,7 @@ pub trait Array2: MutArrayView2 + Sized + result } } - /// + /// matrix vector multiplication fn ax(&self, a_transpose: bool, x: &dyn ArrayView1) -> Self where T: Number, @@ -1199,7 +1199,7 @@ pub trait Array2: MutArrayView2 + Sized + } result } - /// + /// concatenate 1d array fn concatenate_1d<'a>(arrays: &'a [&'a dyn ArrayView1], axis: u8) -> Self { assert!( axis == 1 || axis == 0, @@ -1237,7 +1237,7 @@ pub trait Array2: MutArrayView2 + Sized + ), } } - /// + /// concatenate 2d array fn concatenate_2d<'a>(arrays: &'a [&'a dyn ArrayView2], axis: u8) -> Self { assert!( axis == 1 || axis == 0, @@ -1294,7 +1294,7 @@ pub trait Array2: MutArrayView2 + Sized + } } } - /// + /// merge 1d arrays fn merge_1d<'a>(&'a self, arrays: &'a [&'a dyn ArrayView1], axis: u8, append: bool) -> Self { assert!( axis == 1 || axis == 0, @@ -1362,7 +1362,7 @@ pub trait Array2: MutArrayView2 + Sized + } } } - /// + /// Stack arrays in sequence vertically fn v_stack(&self, other: &dyn ArrayView2) -> Self { let (nrows, ncols) = self.shape(); let (other_nrows, other_ncols) = other.shape(); @@ -1378,7 +1378,7 @@ pub trait Array2: MutArrayView2 + Sized + 0, ) } - /// + /// Stack arrays in sequence horizontally fn h_stack(&self, other: &dyn ArrayView2) -> Self { let (nrows, ncols) = self.shape(); let (other_nrows, other_ncols) = other.shape(); @@ -1394,20 +1394,20 @@ pub trait Array2: MutArrayView2 + Sized + 1, ) } - /// + /// map array values fn map, F: FnMut(&T) -> O>(self, f: F) -> A { let (nrows, ncols) = self.shape(); A::from_iterator(self.iterator(0).map(f), nrows, ncols, 0) } - /// + /// iter rows fn row_iter<'a>(&'a self) -> Box + 'a>> + 'a> { Box::new((0..self.shape().0).map(move |r| self.get_row(r))) } - /// + /// iter cols fn col_iter<'a>(&'a self) -> Box + 'a>> + 'a> { Box::new((0..self.shape().1).map(move |r| self.get_col(r))) } - /// + /// take elements from 2d array fn take(&self, index: &[usize], axis: u8) -> Self { let (nrows, ncols) = self.shape(); @@ -1447,7 +1447,7 @@ pub trait Array2: MutArrayView2 + Sized + fn take_column(&self, column_index: usize) -> Self { self.take(&[column_index], 1) } - /// + /// add a scalar to the array fn add_scalar(&self, x: T) -> Self where T: Number, @@ -1456,7 +1456,7 @@ pub trait Array2: MutArrayView2 + Sized + result.add_scalar_mut(x); result } - /// + /// subtract a scalar from the array fn sub_scalar(&self, x: T) -> Self where T: Number, @@ -1465,7 +1465,7 @@ pub trait Array2: MutArrayView2 + Sized + result.sub_scalar_mut(x); result } - /// + /// divide a scalar from the array fn div_scalar(&self, x: T) -> Self where T: Number, @@ -1474,7 +1474,7 @@ pub trait Array2: MutArrayView2 + Sized + result.div_scalar_mut(x); result } - /// + /// multiply a scalar to the array fn mul_scalar(&self, x: T) -> Self where T: Number, @@ -1483,7 +1483,7 @@ pub trait Array2: MutArrayView2 + Sized + result.mul_scalar_mut(x); result } - /// + /// sum of two arrays fn add(&self, other: &dyn Array) -> Self where T: Number, @@ -1492,7 +1492,7 @@ pub trait Array2: MutArrayView2 + Sized + result.add_mut(other); result } - /// + /// subtract two arrays fn sub(&self, other: &dyn Array) -> Self where T: Number, @@ -1501,7 +1501,7 @@ pub trait Array2: MutArrayView2 + Sized + result.sub_mut(other); result } - /// + /// multiply two arrays fn mul(&self, other: &dyn Array) -> Self where T: Number, @@ -1510,7 +1510,7 @@ pub trait Array2: MutArrayView2 + Sized + result.mul_mut(other); result } - /// + /// divide two arrays fn div(&self, other: &dyn Array) -> Self where T: Number, @@ -1519,7 +1519,7 @@ pub trait Array2: MutArrayView2 + Sized + result.div_mut(other); result } - /// + /// absolute values of the array fn abs(&self) -> Self where T: Number + Signed, @@ -1528,7 +1528,7 @@ pub trait Array2: MutArrayView2 + Sized + result.abs_mut(); result } - /// + /// negation of the array fn neg(&self) -> Self where T: Number + Neg, @@ -1537,7 +1537,7 @@ pub trait Array2: MutArrayView2 + Sized + result.neg_mut(); result } - /// + /// values at power `p` fn pow(&self, p: T) -> Self where T: RealNumber, @@ -1575,7 +1575,7 @@ pub trait Array2: MutArrayView2 + Sized + } } - /// appriximate equality of the elements of a matrix according to a given error + /// approximate equality of the elements of a matrix according to a given error fn approximate_eq(&self, other: &Self, error: T) -> bool where T: Number + RealNumber, @@ -1631,8 +1631,8 @@ mod tests { let v = vec![3., -2., 6.]; assert_eq!(v.norm(1.), 11.); assert_eq!(v.norm(2.), 7.); - assert_eq!(v.norm(std::f64::INFINITY), 6.); - assert_eq!(v.norm(std::f64::NEG_INFINITY), 2.); + assert_eq!(v.norm(f64::INFINITY), 6.); + assert_eq!(v.norm(f64::NEG_INFINITY), 2.); } #[test] diff --git a/src/linalg/traits/evd.rs b/src/linalg/traits/evd.rs index 4db766b0..3bb382a0 100644 --- a/src/linalg/traits/evd.rs +++ b/src/linalg/traits/evd.rs @@ -841,7 +841,7 @@ mod tests { )); for (i, eigen_values_i) in eigen_values.iter().enumerate() { assert!((eigen_values_i - evd.d[i]).abs() < 1e-4); - assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON); + assert!((0f64 - evd.e[i]).abs() < f64::EPSILON); } } #[cfg_attr( @@ -875,7 +875,7 @@ mod tests { )); for (i, eigen_values_i) in eigen_values.iter().enumerate() { assert!((eigen_values_i - evd.d[i]).abs() < 1e-4); - assert!((0f64 - evd.e[i]).abs() < std::f64::EPSILON); + assert!((0f64 - evd.e[i]).abs() < f64::EPSILON); } } #[cfg_attr( diff --git a/src/linalg/traits/stats.rs b/src/linalg/traits/stats.rs index 43c23dce..8702a81a 100644 --- a/src/linalg/traits/stats.rs +++ b/src/linalg/traits/stats.rs @@ -217,8 +217,8 @@ mod tests { let expected_0 = vec![0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]; let expected_1 = vec![1.25, 1.25]; - assert!(m.var(0).approximate_eq(&expected_0, std::f64::EPSILON)); - assert!(m.var(1).approximate_eq(&expected_1, std::f64::EPSILON)); + assert!(m.var(0).approximate_eq(&expected_0, f64::EPSILON)); + assert!(m.var(1).approximate_eq(&expected_1, f64::EPSILON)); assert_eq!( m.mean(0), vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25] diff --git a/src/linalg/traits/svd.rs b/src/linalg/traits/svd.rs index 75c303ae..cee33a0e 100644 --- a/src/linalg/traits/svd.rs +++ b/src/linalg/traits/svd.rs @@ -48,11 +48,9 @@ pub struct SVD> { pub V: M, /// Singular values of the original matrix pub s: Vec, - /// m: usize, - /// n: usize, - /// + /// Tolerance tol: T, } diff --git a/src/linear/bg_solver.rs b/src/linear/bg_solver.rs index 6ee4f0ec..2c466b13 100644 --- a/src/linear/bg_solver.rs +++ b/src/linear/bg_solver.rs @@ -27,9 +27,9 @@ use crate::error::Failed; use crate::linalg::basic::arrays::{Array, Array1, Array2, ArrayView1, MutArrayView1}; use crate::numbers::floatnum::FloatNumber; -/// +/// Trait for Biconjugate Gradient Solver pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2> { - /// + /// Solve Ax = b fn solve_mut( &self, a: &'a X, @@ -109,7 +109,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2> { Ok(err) } - /// + /// solve preconditioner fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) { let diag = Self::diag(a); let n = diag.len(); @@ -133,7 +133,7 @@ pub trait BiconjugateGradientSolver<'a, T: FloatNumber, X: Array2> { y.copy_from(&x.xa(true, a)); } - /// + /// Extract the diagonal from a matrix fn diag(a: &X) -> Vec { let (nrows, ncols) = a.shape(); let n = nrows.min(ncols); diff --git a/src/linear/lasso_optimizer.rs b/src/linear/lasso_optimizer.rs index 3f18c030..22119160 100644 --- a/src/linear/lasso_optimizer.rs +++ b/src/linear/lasso_optimizer.rs @@ -16,7 +16,7 @@ use crate::linalg::basic::arrays::{Array1, Array2, ArrayView1, MutArray, MutArra use crate::linear::bg_solver::BiconjugateGradientSolver; use crate::numbers::floatnum::FloatNumber; -/// +/// Interior Point Optimizer pub struct InteriorPointOptimizer> { ata: X, d1: Vec, @@ -25,9 +25,8 @@ pub struct InteriorPointOptimizer> { prs: Vec, } -/// impl> InteriorPointOptimizer { - /// + /// Initialize a new Interior Point Optimizer pub fn new(a: &X, n: usize) -> InteriorPointOptimizer { InteriorPointOptimizer { ata: a.ab(true, a, false), @@ -38,7 +37,7 @@ impl> InteriorPointOptimizer { } } - /// + /// Run the optimization pub fn optimize( &mut self, x: &X, @@ -101,7 +100,7 @@ impl> InteriorPointOptimizer { // CALCULATE DUALITY GAP let xnu = nu.xa(false, x); - let max_xnu = xnu.norm(std::f64::INFINITY); + let max_xnu = xnu.norm(f64::INFINITY); if max_xnu > lambda_f64 { let lnu = T::from_f64(lambda_f64 / max_xnu).unwrap(); nu.mul_scalar_mut(lnu); @@ -208,7 +207,6 @@ impl> InteriorPointOptimizer { Ok(w) } - /// fn sumlogneg(f: &X) -> T { let (n, _) = f.shape(); let mut sum = T::zero(); @@ -220,11 +218,9 @@ impl> InteriorPointOptimizer { } } -/// impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> for InteriorPointOptimizer { - /// fn solve_preconditioner(&self, a: &'a X, b: &[T], x: &mut [T]) { let (_, p) = a.shape(); @@ -234,7 +230,6 @@ impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> } } - /// fn mat_vec_mul(&self, _: &X, x: &Vec, y: &mut Vec) { let (_, p) = self.ata.shape(); let x_slice = Vec::from_slice(x.slice(0..p).as_ref()); @@ -246,7 +241,6 @@ impl<'a, T: FloatNumber, X: Array2> BiconjugateGradientSolver<'a, T, X> } } - /// fn mat_t_vec_mul(&self, a: &X, x: &Vec, y: &mut Vec) { self.mat_vec_mul(a, x, y); } diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 12ecf8d8..7e934288 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -183,14 +183,11 @@ pub struct LogisticRegression< } trait ObjectiveFunction> { - /// fn f(&self, w_bias: &[T]) -> T; - /// #[allow(clippy::ptr_arg)] fn df(&self, g: &mut Vec, w_bias: &Vec); - /// #[allow(clippy::ptr_arg)] fn partial_dot(w: &[T], x: &X, v_col: usize, m_row: usize) -> T { let mut sum = T::zero(); @@ -629,11 +626,11 @@ mod tests { objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]); objective.df(&mut g, &vec![1., 2., 3., 4., 5., 6., 7., 8., 9.]); - assert!((g[0] + 33.000068218163484).abs() < std::f64::EPSILON); + assert!((g[0] + 33.000068218163484).abs() < f64::EPSILON); let f = objective.f(&[1., 2., 3., 4., 5., 6., 7., 8., 9.]); - assert!((f - 408.0052230582765).abs() < std::f64::EPSILON); + assert!((f - 408.0052230582765).abs() < f64::EPSILON); let objective_reg = MultiClassObjectiveFunction { x: &x, @@ -689,13 +686,13 @@ mod tests { objective.df(&mut g, &vec![1., 2., 3.]); objective.df(&mut g, &vec![1., 2., 3.]); - assert!((g[0] - 26.051064349381285).abs() < std::f64::EPSILON); - assert!((g[1] - 10.239000702928523).abs() < std::f64::EPSILON); - assert!((g[2] - 3.869294270156324).abs() < std::f64::EPSILON); + assert!((g[0] - 26.051064349381285).abs() < f64::EPSILON); + assert!((g[1] - 10.239000702928523).abs() < f64::EPSILON); + assert!((g[2] - 3.869294270156324).abs() < f64::EPSILON); let f = objective.f(&[1., 2., 3.]); - assert!((f - 59.76994756647412).abs() < std::f64::EPSILON); + assert!((f - 59.76994756647412).abs() < f64::EPSILON); let objective_reg = BinaryObjectiveFunction { x: &x, @@ -916,7 +913,7 @@ mod tests { let x: DenseMatrix = DenseMatrix::rand(52181, 94); let y1: Vec = vec![1; 2181]; let y2: Vec = vec![0; 50000]; - let y: Vec = y1.into_iter().chain(y2.into_iter()).collect(); + let y: Vec = y1.into_iter().chain(y2).collect(); let lr = LogisticRegression::fit(&x, &y, Default::default()).unwrap(); let lr_reg = LogisticRegression::fit( @@ -938,12 +935,12 @@ mod tests { let x: &DenseMatrix = &DenseMatrix::rand(52181, 94); let y1: Vec = vec![1; 2181]; let y2: Vec = vec![0; 50000]; - let y: &Vec = &(y1.into_iter().chain(y2.into_iter()).collect()); + let y: &Vec = &(y1.into_iter().chain(y2).collect()); println!("y vec height: {:?}", y.len()); println!("x matrix shape: {:?}", x.shape()); let lr = LogisticRegression::fit(x, y, Default::default()).unwrap(); - let y_hat = lr.predict(&x).unwrap(); + let y_hat = lr.predict(x).unwrap(); println!("y_hat shape: {:?}", y_hat.shape()); diff --git a/src/naive_bayes/bernoulli.rs b/src/naive_bayes/bernoulli.rs index 33f00bd4..4be62d56 100644 --- a/src/naive_bayes/bernoulli.rs +++ b/src/naive_bayes/bernoulli.rs @@ -258,7 +258,7 @@ impl BernoulliNBDistribution { /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, - /// priors are adjusted according to the data. + /// priors are adjusted according to the data. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. /// * `binarize` - Threshold for binarizing. fn fit, Y: Array1>( @@ -402,10 +402,10 @@ impl, Y: Arr { /// Fits BernoulliNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors, alpha for smoothing and - /// binarizing threshold. + /// binarizing threshold. pub fn fit(x: &X, y: &Y, parameters: BernoulliNBParameters) -> Result { let distribution = if let Some(threshold) = parameters.binarize { BernoulliNBDistribution::fit( @@ -427,6 +427,7 @@ impl, Y: Arr /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { if let Some(threshold) = self.binarize { diff --git a/src/naive_bayes/categorical.rs b/src/naive_bayes/categorical.rs index 71a7487a..b60ee0d3 100644 --- a/src/naive_bayes/categorical.rs +++ b/src/naive_bayes/categorical.rs @@ -95,7 +95,7 @@ impl PartialEq for CategoricalNBDistribution { return false; } for (a_i_j, b_i_j) in a_i.iter().zip(b_i.iter()) { - if (*a_i_j - *b_i_j).abs() > std::f64::EPSILON { + if (*a_i_j - *b_i_j).abs() > f64::EPSILON { return false; } } @@ -363,7 +363,7 @@ impl, Y: Array1> Predictor for Categ impl, Y: Array1> CategoricalNB { /// Fits CategoricalNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like alpha for smoothing pub fn fit(x: &X, y: &Y, parameters: CategoricalNBParameters) -> Result { @@ -375,6 +375,7 @@ impl, Y: Array1> CategoricalNB { /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { self.inner.as_ref().unwrap().predict(x) diff --git a/src/naive_bayes/gaussian.rs b/src/naive_bayes/gaussian.rs index aff996be..e774fdc9 100644 --- a/src/naive_bayes/gaussian.rs +++ b/src/naive_bayes/gaussian.rs @@ -175,7 +175,7 @@ impl GaussianNBDistribution { /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, - /// priors are adjusted according to the data. + /// priors are adjusted according to the data. pub fn fit, Y: Array1>( x: &X, y: &Y, @@ -317,7 +317,7 @@ impl, Y: Arr { /// Fits GaussianNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors. pub fn fit(x: &X, y: &Y, parameters: GaussianNBParameters) -> Result { @@ -328,6 +328,7 @@ impl, Y: Arr /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { self.inner.as_ref().unwrap().predict(x) diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 1d74a315..31cdd46d 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -89,6 +89,7 @@ impl, Y: Array1, D: NBDistribution Result { let y_classes = self.distribution.classes(); @@ -163,7 +164,7 @@ mod tests { } fn classes(&self) -> &Vec { - &self.0 + self.0 } } diff --git a/src/naive_bayes/multinomial.rs b/src/naive_bayes/multinomial.rs index 2d6c437c..e00965ed 100644 --- a/src/naive_bayes/multinomial.rs +++ b/src/naive_bayes/multinomial.rs @@ -208,7 +208,7 @@ impl MultinomialNBDistribution { /// * `x` - training data. /// * `y` - vector with target values (classes) of length N. /// * `priors` - Optional vector with prior probabilities of the classes. If not defined, - /// priors are adjusted according to the data. + /// priors are adjusted according to the data. /// * `alpha` - Additive (Laplace/Lidstone) smoothing parameter. pub fn fit, Y: Array1>( x: &X, @@ -345,10 +345,10 @@ impl, Y: Array { /// Fits MultinomialNB with given data /// * `x` - training data of size NxM where N is the number of samples and M is the number of - /// features. + /// features. /// * `y` - vector with target values (classes) of length N. /// * `parameters` - additional parameters like class priors, alpha for smoothing and - /// binarizing threshold. + /// binarizing threshold. pub fn fit(x: &X, y: &Y, parameters: MultinomialNBParameters) -> Result { let distribution = MultinomialNBDistribution::fit(x, y, parameters.alpha, parameters.priors)?; @@ -358,6 +358,7 @@ impl, Y: Array /// Estimates the class labels for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with class estimates. pub fn predict(&self, x: &X) -> Result { self.inner.as_ref().unwrap().predict(x) diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index d18620c9..137143e0 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -261,6 +261,7 @@ impl, Y: Array1, D: Distance Result { let mut result = Y::zeros(x.shape().0); diff --git a/src/neighbors/knn_regressor.rs b/src/neighbors/knn_regressor.rs index e4efe48a..b49743f8 100644 --- a/src/neighbors/knn_regressor.rs +++ b/src/neighbors/knn_regressor.rs @@ -88,25 +88,21 @@ pub struct KNNRegressor, Y: Array1, D: impl, Y: Array1, D: Distance>> KNNRegressor { - /// fn y(&self) -> &Y { self.y.as_ref().unwrap() } - /// fn knn_algorithm(&self) -> &KNNAlgorithm { self.knn_algorithm .as_ref() .expect("Missing parameter: KNNAlgorithm") } - /// fn weight(&self) -> &KNNWeightFunction { self.weight.as_ref().expect("Missing parameter: weight") } #[allow(dead_code)] - /// fn k(&self) -> usize { self.k.unwrap() } @@ -250,6 +246,7 @@ impl, Y: Array1, D: Distance>> /// Predict the target for the provided data. /// * `x` - data of shape NxM where N is number of data points to estimate and M is number of features. + /// /// Returns a vector of size N with estimates. pub fn predict(&self, x: &X) -> Result { let mut result = Y::zeros(x.shape().0); @@ -312,7 +309,7 @@ mod tests { let y_hat = knn.predict(&x).unwrap(); assert_eq!(5, Vec::len(&y_hat)); for i in 0..y_hat.len() { - assert!((y_hat[i] - y_exp[i]).abs() < std::f64::EPSILON); + assert!((y_hat[i] - y_exp[i]).abs() < f64::EPSILON); } } diff --git a/src/optimization/first_order/gradient_descent.rs b/src/optimization/first_order/gradient_descent.rs index 9cc78f0c..0be7222f 100644 --- a/src/optimization/first_order/gradient_descent.rs +++ b/src/optimization/first_order/gradient_descent.rs @@ -1,5 +1,3 @@ -// TODO: missing documentation - use std::default::Default; use crate::linalg::basic::arrays::Array1; @@ -8,30 +6,27 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -/// +/// Gradient Descent optimization algorithm pub struct GradientDescent { - /// + /// Maximum number of iterations pub max_iter: usize, - /// + /// Relative tolerance for the gradient norm pub g_rtol: f64, - /// + /// Absolute tolerance for the gradient norm pub g_atol: f64, } -/// impl Default for GradientDescent { fn default() -> Self { GradientDescent { max_iter: 10000, - g_rtol: std::f64::EPSILON.sqrt(), - g_atol: std::f64::EPSILON, + g_rtol: f64::EPSILON.sqrt(), + g_atol: f64::EPSILON, } } } -/// impl FirstOrderOptimizer for GradientDescent { - /// fn optimize<'a, X: Array1, LS: LineSearchMethod>( &self, f: &'a F<'_, T, X>, diff --git a/src/optimization/first_order/lbfgs.rs b/src/optimization/first_order/lbfgs.rs index 81e7b640..b4f6c9f1 100644 --- a/src/optimization/first_order/lbfgs.rs +++ b/src/optimization/first_order/lbfgs.rs @@ -11,31 +11,29 @@ use crate::optimization::first_order::{FirstOrderOptimizer, OptimizerResult}; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -/// +/// Limited-memory BFGS optimization algorithm pub struct LBFGS { - /// + /// Maximum number of iterations pub max_iter: usize, - /// + /// TODO: Add documentation pub g_rtol: f64, - /// + /// TODO: Add documentation pub g_atol: f64, - /// + /// TODO: Add documentation pub x_atol: f64, - /// + /// TODO: Add documentation pub x_rtol: f64, - /// + /// TODO: Add documentation pub f_abstol: f64, - /// + /// TODO: Add documentation pub f_reltol: f64, - /// + /// TODO: Add documentation pub successive_f_tol: usize, - /// + /// TODO: Add documentation pub m: usize, } -/// impl Default for LBFGS { - /// fn default() -> Self { LBFGS { max_iter: 1000, @@ -51,9 +49,7 @@ impl Default for LBFGS { } } -/// impl LBFGS { - /// fn two_loops>(&self, state: &mut LBFGSState) { let lower = state.iteration.max(self.m) - self.m; let upper = state.iteration; @@ -95,7 +91,6 @@ impl LBFGS { state.s.mul_scalar_mut(-T::one()); } - /// fn init_state>(&self, x: &X) -> LBFGSState { LBFGSState { x: x.clone(), @@ -119,7 +114,6 @@ impl LBFGS { } } - /// fn update_state<'a, T: FloatNumber + RealNumber, X: Array1, LS: LineSearchMethod>( &self, f: &'a F<'_, T, X>, @@ -161,7 +155,6 @@ impl LBFGS { df(&mut state.x_df, &state.x); } - /// fn assess_convergence>( &self, state: &mut LBFGSState, @@ -173,7 +166,7 @@ impl LBFGS { } if state.x.max_diff(&state.x_prev) - <= T::from_f64(self.x_rtol * state.x.norm(std::f64::INFINITY)).unwrap() + <= T::from_f64(self.x_rtol * state.x.norm(f64::INFINITY)).unwrap() { x_converged = true; } @@ -188,14 +181,13 @@ impl LBFGS { state.counter_f_tol += 1; } - if state.x_df.norm(std::f64::INFINITY) <= self.g_atol { + if state.x_df.norm(f64::INFINITY) <= self.g_atol { g_converged = true; } g_converged || x_converged || state.counter_f_tol > self.successive_f_tol } - /// fn update_hessian>( &self, _: &DF<'_, X>, @@ -212,7 +204,6 @@ impl LBFGS { } } -/// #[derive(Debug)] struct LBFGSState> { x: X, @@ -234,9 +225,7 @@ struct LBFGSState> { alpha: T, } -/// impl FirstOrderOptimizer for LBFGS { - /// fn optimize<'a, X: Array1, LS: LineSearchMethod>( &self, f: &F<'_, T, X>, @@ -248,7 +237,7 @@ impl FirstOrderOptimizer for LBFGS { df(&mut state.x_df, x0); - let g_converged = state.x_df.norm(std::f64::INFINITY) < self.g_atol; + let g_converged = state.x_df.norm(f64::INFINITY) < self.g_atol; let mut converged = g_converged; let stopped = false; @@ -299,7 +288,7 @@ mod tests { let result = optimizer.optimize(&f, &df, &x0, &ls); - assert!((result.f_x - 0.0).abs() < std::f64::EPSILON); + assert!((result.f_x - 0.0).abs() < f64::EPSILON); assert!((result.x[0] - 1.0).abs() < 1e-8); assert!((result.x[1] - 1.0).abs() < 1e-8); assert!(result.iterations <= 24); diff --git a/src/optimization/first_order/mod.rs b/src/optimization/first_order/mod.rs index 910be275..cf7e4f91 100644 --- a/src/optimization/first_order/mod.rs +++ b/src/optimization/first_order/mod.rs @@ -1,6 +1,6 @@ -/// +/// Gradient descent optimization algorithm pub mod gradient_descent; -/// +/// Limited-memory BFGS optimization algorithm pub mod lbfgs; use std::clone::Clone; @@ -11,9 +11,9 @@ use crate::numbers::floatnum::FloatNumber; use crate::optimization::line_search::LineSearchMethod; use crate::optimization::{DF, F}; -/// +/// First-order optimization is a class of algorithms that use the first derivative of a function to find optimal solutions. pub trait FirstOrderOptimizer { - /// + /// run first order optimization fn optimize<'a, X: Array1, LS: LineSearchMethod>( &self, f: &F<'_, T, X>, @@ -23,13 +23,13 @@ pub trait FirstOrderOptimizer { ) -> OptimizerResult; } -/// +/// Result of optimization #[derive(Debug, Clone)] pub struct OptimizerResult> { - /// + /// Solution pub x: X, - /// + /// f(x) value pub f_x: T, - /// + /// number of iterations pub iterations: usize, } diff --git a/src/optimization/line_search.rs b/src/optimization/line_search.rs index 9a2656cd..8357d8da 100644 --- a/src/optimization/line_search.rs +++ b/src/optimization/line_search.rs @@ -1,11 +1,9 @@ -// TODO: missing documentation - use crate::optimization::FunctionOrder; use num_traits::Float; -/// +/// Line search optimization. pub trait LineSearchMethod { - /// + /// Find alpha that satisfies strong Wolfe conditions. fn search( &self, f: &(dyn Fn(T) -> T), @@ -16,32 +14,31 @@ pub trait LineSearchMethod { ) -> LineSearchResult; } -/// +/// Line search result #[derive(Debug, Clone)] pub struct LineSearchResult { - /// + /// Alpha value pub alpha: T, - /// + /// f(alpha) value pub f_x: T, } -/// +/// Backtracking line search method. pub struct Backtracking { - /// + /// TODO: Add documentation pub c1: T, - /// + /// Maximum number of iterations for Backtracking single run pub max_iterations: usize, - /// + /// TODO: Add documentation pub max_infinity_iterations: usize, - /// + /// TODO: Add documentation pub phi: T, - /// + /// TODO: Add documentation pub plo: T, - /// + /// function order pub order: FunctionOrder, } -/// impl Default for Backtracking { fn default() -> Self { Backtracking { @@ -55,9 +52,7 @@ impl Default for Backtracking { } } -/// impl LineSearchMethod for Backtracking { - /// fn search( &self, f: &(dyn Fn(T) -> T), diff --git a/src/optimization/mod.rs b/src/optimization/mod.rs index 2f6c41a2..83ca2493 100644 --- a/src/optimization/mod.rs +++ b/src/optimization/mod.rs @@ -1,21 +1,19 @@ -// TODO: missing documentation - -/// +/// first order optimization algorithms pub mod first_order; -/// +/// line search algorithms pub mod line_search; -/// +/// Function f(x) = y pub type F<'a, T, X> = dyn for<'b> Fn(&'b X) -> T + 'a; -/// +/// Function df(x) pub type DF<'a, X> = dyn for<'b> Fn(&'b mut X, &'b X) + 'a; -/// +/// Function order #[allow(clippy::upper_case_acronyms)] #[derive(Debug, PartialEq, Eq)] pub enum FunctionOrder { - /// + /// Second order SECOND, - /// + /// Third order THIRD, } diff --git a/src/svm/mod.rs b/src/svm/mod.rs index 0792fdb8..f6baf8bb 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -292,7 +292,7 @@ mod tests { .unwrap() .abs(); - assert!((4913f64 - result) < std::f64::EPSILON); + assert!((4913f64 - result).abs() < f64::EPSILON); } #[cfg_attr( diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 4da9f443..c6596517 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -197,12 +197,12 @@ impl PartialEq for Node { self.output == other.output && self.split_feature == other.split_feature && match (self.split_value, other.split_value) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } && match (self.split_score, other.split_score) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } @@ -613,7 +613,7 @@ impl, Y: Array1> visitor_queue.push_back(visitor); } - while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) { + while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) { match visitor_queue.pop_front() { Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng), None => break, @@ -650,7 +650,7 @@ impl, Y: Array1> if node.true_child.is_none() && node.false_child.is_none() { result = node.output; } else if x.get((row, node.split_feature)).to_f64().unwrap() - <= node.split_value.unwrap_or(std::f64::NAN) + <= node.split_value.unwrap_or(f64::NAN) { queue.push_back(node.true_child.unwrap()); } else { @@ -803,9 +803,7 @@ impl, Y: Array1> .get((i, self.nodes()[visitor.node].split_feature)) .to_f64() .unwrap() - <= self.nodes()[visitor.node] - .split_value - .unwrap_or(std::f64::NAN) + <= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN) { *true_sample = visitor.samples[i]; tc += *true_sample; @@ -925,14 +923,14 @@ mod tests { )] #[test] fn gini_impurity() { - assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < std::f64::EPSILON); + assert!((impurity(&SplitCriterion::Gini, &[7, 3], 10) - 0.42).abs() < f64::EPSILON); assert!( (impurity(&SplitCriterion::Entropy, &[7, 3], 10) - 0.8812908992306927).abs() - < std::f64::EPSILON + < f64::EPSILON ); assert!( (impurity(&SplitCriterion::ClassificationError, &[7, 3], 10) - 0.3).abs() - < std::f64::EPSILON + < f64::EPSILON ); } diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 1569af2e..d735697d 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -311,15 +311,15 @@ impl Node { impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { - (self.output - other.output).abs() < std::f64::EPSILON + (self.output - other.output).abs() < f64::EPSILON && self.split_feature == other.split_feature && match (self.split_value, other.split_value) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } && match (self.split_score, other.split_score) { - (Some(a), Some(b)) => (a - b).abs() < std::f64::EPSILON, + (Some(a), Some(b)) => (a - b).abs() < f64::EPSILON, (None, None) => true, _ => false, } @@ -478,7 +478,7 @@ impl, Y: Array1> visitor_queue.push_back(visitor); } - while tree.depth() < tree.parameters().max_depth.unwrap_or(std::u16::MAX) { + while tree.depth() < tree.parameters().max_depth.unwrap_or(u16::MAX) { match visitor_queue.pop_front() { Some(node) => tree.split(node, mtry, &mut visitor_queue, &mut rng), None => break, @@ -515,7 +515,7 @@ impl, Y: Array1> if node.true_child.is_none() && node.false_child.is_none() { result = node.output; } else if x.get((row, node.split_feature)).to_f64().unwrap() - <= node.split_value.unwrap_or(std::f64::NAN) + <= node.split_value.unwrap_or(f64::NAN) { queue.push_back(node.true_child.unwrap()); } else { @@ -640,9 +640,7 @@ impl, Y: Array1> .get((i, self.nodes()[visitor.node].split_feature)) .to_f64() .unwrap() - <= self.nodes()[visitor.node] - .split_value - .unwrap_or(std::f64::NAN) + <= self.nodes()[visitor.node].split_value.unwrap_or(f64::NAN) { *true_sample = visitor.samples[i]; tc += *true_sample; From 4523ac73ff45df7af40f3deac2c13d977066ce20 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 11:47:23 -0400 Subject: [PATCH 14/15] Update itertools requirement from 0.12.0 to 0.13.0 (#280) Updates the requirements on [itertools](https://github.com/rust-itertools/itertools) to permit the latest version. - [Changelog](https://github.com/rust-itertools/itertools/blob/master/CHANGELOG.md) - [Commits](https://github.com/rust-itertools/itertools/compare/v0.12.0...v0.13.0) --- updated-dependencies: - dependency-name: itertools dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a3fea09b..b2e186a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -48,7 +48,7 @@ getrandom = { version = "0.2.8", optional = true } wasm-bindgen-test = "0.3" [dev-dependencies] -itertools = "0.12.0" +itertools = "0.13.0" serde_json = "1.0" bincode = "1.3.1" From 3da433f757e37c257e7ea29286b613896c362573 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Mon, 20 Jan 2025 18:50:00 +0000 Subject: [PATCH 15/15] Implement predict_proba for DecisionTreeClassifier (#287) * Implement predict_proba for DecisionTreeClassifier * Some automated fixes suggested by cargo clippy --fix --- src/algorithm/neighbour/fastpair.rs | 4 +- src/linalg/basic/matrix.rs | 25 +++--- src/linalg/basic/vector.rs | 12 +-- src/linalg/ndarray/matrix.rs | 16 ++-- src/linalg/ndarray/vector.rs | 12 +-- src/linalg/traits/stats.rs | 1 - src/linear/logistic_regression.rs | 8 +- src/naive_bayes/mod.rs | 2 +- src/preprocessing/numerical.rs | 20 ++--- src/readers/csv.rs | 2 +- src/svm/svc.rs | 6 +- src/svm/svr.rs | 6 +- src/tree/decision_tree_classifier.rs | 113 +++++++++++++++++++++++++++ 13 files changed, 166 insertions(+), 61 deletions(-) diff --git a/src/algorithm/neighbour/fastpair.rs b/src/algorithm/neighbour/fastpair.rs index 9f663f67..4e99261b 100644 --- a/src/algorithm/neighbour/fastpair.rs +++ b/src/algorithm/neighbour/fastpair.rs @@ -212,7 +212,9 @@ mod tests_fastpair { use crate::linalg::basic::{arrays::Array, matrix::DenseMatrix}; /// Brute force algorithm, used only for comparison and testing - pub fn closest_pair_brute(fastpair: &FastPair>) -> PairwiseDistance { + pub fn closest_pair_brute( + fastpair: &FastPair<'_, f64, DenseMatrix>, + ) -> PairwiseDistance { use itertools::Itertools; let m = fastpair.samples.shape().0; diff --git a/src/linalg/basic/matrix.rs b/src/linalg/basic/matrix.rs index 47c5e9d2..88a0849c 100644 --- a/src/linalg/basic/matrix.rs +++ b/src/linalg/basic/matrix.rs @@ -91,7 +91,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixView<'a, T> { } } -impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixView<'a, T> { +impl fmt::Display for DenseMatrixView<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, @@ -142,7 +142,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { } } - fn iter_mut<'b>(&'b mut self, axis: u8) -> Box + 'b> { + fn iter_mut<'b>(&'b mut self, axis: u8) -> Box + 'b> { let column_major = self.column_major; let stride = self.stride; let ptr = self.values.as_mut_ptr(); @@ -169,7 +169,7 @@ impl<'a, T: Debug + Display + Copy + Sized> DenseMatrixMutView<'a, T> { } } -impl<'a, T: Debug + Display + Copy + Sized> fmt::Display for DenseMatrixMutView<'a, T> { +impl fmt::Display for DenseMatrixMutView<'_, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { writeln!( f, @@ -493,7 +493,7 @@ impl EVDDecomposable for DenseMatrix {} impl LUDecomposable for DenseMatrix {} impl SVDDecomposable for DenseMatrix {} -impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView<'a, T> { +impl Array for DenseMatrixView<'_, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { &self.values[pos.0 + pos.1 * self.stride] @@ -515,7 +515,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMa } } -impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView<'a, T> { +impl Array for DenseMatrixView<'_, T> { fn get(&self, i: usize) -> &T { if self.nrows == 1 { if self.column_major { @@ -553,11 +553,11 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixView< } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for DenseMatrixView<'a, T> {} +impl ArrayView2 for DenseMatrixView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for DenseMatrixView<'a, T> {} +impl ArrayView1 for DenseMatrixView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMatrixMutView<'a, T> { +impl Array for DenseMatrixMutView<'_, T> { fn get(&self, pos: (usize, usize)) -> &T { if self.column_major { &self.values[pos.0 + pos.1 * self.stride] @@ -579,9 +579,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for DenseMa } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray - for DenseMatrixMutView<'a, T> -{ +impl MutArray for DenseMatrixMutView<'_, T> { fn set(&mut self, pos: (usize, usize), x: T) { if self.column_major { self.values[pos.0 + pos.1 * self.stride] = x; @@ -595,15 +593,16 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray } } -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2 for DenseMatrixMutView<'a, T> {} +impl MutArrayView2 for DenseMatrixMutView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for DenseMatrixMutView<'a, T> {} +impl ArrayView2 for DenseMatrixMutView<'_, T> {} impl MatrixStats for DenseMatrix {} impl MatrixPreprocessing for DenseMatrix {} #[cfg(test)] +#[warn(clippy::reversed_empty_ranges)] mod tests { use super::*; use approx::relative_eq; diff --git a/src/linalg/basic/vector.rs b/src/linalg/basic/vector.rs index 05c03756..d2e0bae6 100644 --- a/src/linalg/basic/vector.rs +++ b/src/linalg/basic/vector.rs @@ -119,7 +119,7 @@ impl Array1 for Vec { } } -impl<'a, T: Debug + Display + Copy + Sized> Array for VecMutView<'a, T> { +impl Array for VecMutView<'_, T> { fn get(&self, i: usize) -> &T { &self.ptr[i] } @@ -138,7 +138,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for VecMutView<'a, T } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray for VecMutView<'a, T> { +impl MutArray for VecMutView<'_, T> { fn set(&mut self, i: usize, x: T) { self.ptr[i] = x; } @@ -149,10 +149,10 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray for VecMutView<'a } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for VecMutView<'a, T> {} -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1 for VecMutView<'a, T> {} +impl ArrayView1 for VecMutView<'_, T> {} +impl MutArrayView1 for VecMutView<'_, T> {} -impl<'a, T: Debug + Display + Copy + Sized> Array for VecView<'a, T> { +impl Array for VecView<'_, T> { fn get(&self, i: usize) -> &T { &self.ptr[i] } @@ -171,7 +171,7 @@ impl<'a, T: Debug + Display + Copy + Sized> Array for VecView<'a, T> { } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for VecView<'a, T> {} +impl ArrayView1 for VecView<'_, T> {} #[cfg(test)] mod tests { diff --git a/src/linalg/ndarray/matrix.rs b/src/linalg/ndarray/matrix.rs index adc8d7e8..5040497a 100644 --- a/src/linalg/ndarray/matrix.rs +++ b/src/linalg/ndarray/matrix.rs @@ -68,7 +68,7 @@ impl ArrayView2 for ArrayBase impl MutArrayView2 for ArrayBase, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayView<'a, T, Ix2> { +impl BaseArray for ArrayView<'_, T, Ix2> { fn get(&self, pos: (usize, usize)) -> &T { &self[[pos.0, pos.1]] } @@ -144,11 +144,9 @@ impl EVDDecomposable for ArrayBase, Ix2> impl LUDecomposable for ArrayBase, Ix2> {} impl SVDDecomposable for ArrayBase, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for ArrayView<'a, T, Ix2> {} +impl ArrayView2 for ArrayView<'_, T, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray - for ArrayViewMut<'a, T, Ix2> -{ +impl BaseArray for ArrayViewMut<'_, T, Ix2> { fn get(&self, pos: (usize, usize)) -> &T { &self[[pos.0, pos.1]] } @@ -175,9 +173,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray - for ArrayViewMut<'a, T, Ix2> -{ +impl MutArray for ArrayViewMut<'_, T, Ix2> { fn set(&mut self, pos: (usize, usize), x: T) { self[[pos.0, pos.1]] = x } @@ -195,9 +191,9 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray } } -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView2 for ArrayViewMut<'a, T, Ix2> {} +impl MutArrayView2 for ArrayViewMut<'_, T, Ix2> {} -impl<'a, T: Debug + Display + Copy + Sized> ArrayView2 for ArrayViewMut<'a, T, Ix2> {} +impl ArrayView2 for ArrayViewMut<'_, T, Ix2> {} #[cfg(test)] mod tests { diff --git a/src/linalg/ndarray/vector.rs b/src/linalg/ndarray/vector.rs index 7105da89..de3f7d93 100644 --- a/src/linalg/ndarray/vector.rs +++ b/src/linalg/ndarray/vector.rs @@ -41,7 +41,7 @@ impl ArrayView1 for ArrayBase impl MutArrayView1 for ArrayBase, Ix1> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayView<'a, T, Ix1> { +impl BaseArray for ArrayView<'_, T, Ix1> { fn get(&self, i: usize) -> &T { &self[i] } @@ -60,9 +60,9 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayView<'a } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for ArrayView<'a, T, Ix1> {} +impl ArrayView1 for ArrayView<'_, T, Ix1> {} -impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayViewMut<'a, T, Ix1> { +impl BaseArray for ArrayViewMut<'_, T, Ix1> { fn get(&self, i: usize) -> &T { &self[i] } @@ -81,7 +81,7 @@ impl<'a, T: Debug + Display + Copy + Sized> BaseArray for ArrayViewMut } } -impl<'a, T: Debug + Display + Copy + Sized> MutArray for ArrayViewMut<'a, T, Ix1> { +impl MutArray for ArrayViewMut<'_, T, Ix1> { fn set(&mut self, i: usize, x: T) { self[i] = x; } @@ -92,8 +92,8 @@ impl<'a, T: Debug + Display + Copy + Sized> MutArray for ArrayViewMut< } } -impl<'a, T: Debug + Display + Copy + Sized> ArrayView1 for ArrayViewMut<'a, T, Ix1> {} -impl<'a, T: Debug + Display + Copy + Sized> MutArrayView1 for ArrayViewMut<'a, T, Ix1> {} +impl ArrayView1 for ArrayViewMut<'_, T, Ix1> {} +impl MutArrayView1 for ArrayViewMut<'_, T, Ix1> {} impl Array1 for ArrayBase, Ix1> { fn slice<'a>(&'a self, range: Range) -> Box + 'a> { diff --git a/src/linalg/traits/stats.rs b/src/linalg/traits/stats.rs index 8702a81a..6c3db820 100644 --- a/src/linalg/traits/stats.rs +++ b/src/linalg/traits/stats.rs @@ -142,7 +142,6 @@ pub trait MatrixPreprocessing: MutArrayView2 + Clone { /// /// assert_eq!(a, expected); /// ``` - fn binarize_mut(&mut self, threshold: T) { let (nrows, ncols) = self.shape(); for row in 0..nrows { diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 7e934288..c28dc347 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -258,8 +258,8 @@ impl, Y: } } -impl<'a, T: Number + FloatNumber, X: Array2> ObjectiveFunction - for BinaryObjectiveFunction<'a, T, X> +impl> ObjectiveFunction + for BinaryObjectiveFunction<'_, T, X> { fn f(&self, w_bias: &[T]) -> T { let mut f = T::zero(); @@ -313,8 +313,8 @@ struct MultiClassObjectiveFunction<'a, T: Number + FloatNumber, X: Array2> { _phantom_t: PhantomData, } -impl<'a, T: Number + FloatNumber + RealNumber, X: Array2> ObjectiveFunction - for MultiClassObjectiveFunction<'a, T, X> +impl> ObjectiveFunction + for MultiClassObjectiveFunction<'_, T, X> { fn f(&self, w_bias: &[T]) -> T { let mut f = T::zero(); diff --git a/src/naive_bayes/mod.rs b/src/naive_bayes/mod.rs index 31cdd46d..26d91545 100644 --- a/src/naive_bayes/mod.rs +++ b/src/naive_bayes/mod.rs @@ -147,7 +147,7 @@ mod tests { #[derive(Debug, PartialEq, Clone)] struct TestDistribution<'d>(&'d Vec); - impl<'d> NBDistribution for TestDistribution<'d> { + impl NBDistribution for TestDistribution<'_> { fn prior(&self, _class_index: usize) -> f64 { 1. } diff --git a/src/preprocessing/numerical.rs b/src/preprocessing/numerical.rs index ddb74a45..674f6814 100644 --- a/src/preprocessing/numerical.rs +++ b/src/preprocessing/numerical.rs @@ -172,18 +172,14 @@ where T: Number + RealNumber, M: Array2, { - if let Some(output_matrix) = columns.first().cloned() { - return Some( - columns - .iter() - .skip(1) - .fold(output_matrix, |current_matrix, new_colum| { - current_matrix.h_stack(new_colum) - }), - ); - } else { - None - } + columns.first().cloned().map(|output_matrix| { + columns + .iter() + .skip(1) + .fold(output_matrix, |current_matrix, new_colum| { + current_matrix.h_stack(new_colum) + }) + }) } #[cfg(test)] diff --git a/src/readers/csv.rs b/src/readers/csv.rs index f8a03ebd..e9a88436 100644 --- a/src/readers/csv.rs +++ b/src/readers/csv.rs @@ -30,7 +30,7 @@ pub struct CSVDefinition<'a> { /// What seperates the fields in your csv-file? field_seperator: &'a str, } -impl<'a> Default for CSVDefinition<'a> { +impl Default for CSVDefinition<'_> { fn default() -> Self { Self { n_rows_header: 1, diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 6477778b..cc5a0beb 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -360,8 +360,8 @@ impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2 + 'a, Y: Array } } -impl<'a, TX: Number + RealNumber, TY: Number + Ord, X: Array2, Y: Array1> PartialEq - for SVC<'a, TX, TY, X, Y> +impl, Y: Array1> PartialEq + for SVC<'_, TX, TY, X, Y> { fn eq(&self, other: &Self) -> bool { if (self.b.unwrap().sub(other.b.unwrap())).abs() > TX::epsilon() * TX::two() @@ -1110,7 +1110,7 @@ mod tests { let svc = SVC::fit(&x, &y, ¶ms).unwrap(); // serialization - let deserialized_svc: SVC = + let deserialized_svc: SVC<'_, f64, i32, _, _> = serde_json::from_str(&serde_json::to_string(&svc).unwrap()).unwrap(); assert_eq!(svc, deserialized_svc); diff --git a/src/svm/svr.rs b/src/svm/svr.rs index e68ebf85..4ce0aa28 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -281,8 +281,8 @@ impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> SVR<' } } -impl<'a, T: Number + FloatNumber + PartialOrd, X: Array2, Y: Array1> PartialEq - for SVR<'a, T, X, Y> +impl, Y: Array1> PartialEq + for SVR<'_, T, X, Y> { fn eq(&self, other: &Self) -> bool { if (self.b - other.b).abs() > T::epsilon() * T::two() @@ -702,7 +702,7 @@ mod tests { let svr = SVR::fit(&x, &y, ¶ms).unwrap(); - let deserialized_svr: SVR, _> = + let deserialized_svr: SVR<'_, f64, DenseMatrix, _> = serde_json::from_str(&serde_json::to_string(&svr).unwrap()).unwrap(); assert_eq!(svr, deserialized_svr); diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index c6596517..5679516a 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -77,7 +77,9 @@ use serde::{Deserialize, Serialize}; use crate::api::{Predictor, SupervisedEstimator}; use crate::error::Failed; +use crate::linalg::basic::arrays::MutArray; use crate::linalg::basic::arrays::{Array1, Array2, MutArrayView1}; +use crate::linalg::basic::matrix::DenseMatrix; use crate::numbers::basenum::Number; use crate::rand_custom::get_rng_impl; @@ -887,11 +889,77 @@ impl, Y: Array1> } importances } + + /// Predict class probabilities for the input samples. + /// + /// # Arguments + /// + /// * `x` - The input samples as a matrix where each row is a sample and each column is a feature. + /// + /// # Returns + /// + /// A `Result` containing a `DenseMatrix` where each row corresponds to a sample and each column + /// corresponds to a class. The values represent the probability of the sample belonging to each class. + /// + /// # Errors + /// + /// Returns an error if at least one row prediction process fails. + pub fn predict_proba(&self, x: &X) -> Result, Failed> { + let (n_samples, _) = x.shape(); + let n_classes = self.classes().len(); + let mut result = DenseMatrix::::zeros(n_samples, n_classes); + + for i in 0..n_samples { + let probs = self.predict_proba_for_row(x, i)?; + for (j, &prob) in probs.iter().enumerate() { + result.set((i, j), prob); + } + } + + Ok(result) + } + + /// Predict class probabilities for a single input sample. + /// + /// # Arguments + /// + /// * `x` - The input matrix containing all samples. + /// * `row` - The index of the row in `x` for which to predict probabilities. + /// + /// # Returns + /// + /// A vector of probabilities, one for each class, representing the probability + /// of the input sample belonging to each class. + fn predict_proba_for_row(&self, x: &X, row: usize) -> Result, Failed> { + let mut node = 0; + + while let Some(current_node) = self.nodes().get(node) { + if current_node.true_child.is_none() && current_node.false_child.is_none() { + // Leaf node reached + let mut probs = vec![0.0; self.classes().len()]; + probs[current_node.output] = 1.0; + return Ok(probs); + } + + let split_feature = current_node.split_feature; + let split_value = current_node.split_value.unwrap_or(f64::NAN); + + if x.get((row, split_feature)).to_f64().unwrap() <= split_value { + node = current_node.true_child.unwrap(); + } else { + node = current_node.false_child.unwrap(); + } + } + + // This should never happen if the tree is properly constructed + Err(Failed::predict("Nodes iteration did not reach leaf")) + } } #[cfg(test)] mod tests { use super::*; + use crate::linalg::basic::arrays::Array; use crate::linalg::basic::matrix::DenseMatrix; #[test] @@ -934,6 +1002,51 @@ mod tests { ); } + #[cfg_attr( + all(target_arch = "wasm32", not(target_os = "wasi")), + wasm_bindgen_test::wasm_bindgen_test + )] + #[test] + fn test_predict_proba() { + let x: DenseMatrix = DenseMatrix::from_2d_array(&[ + &[5.1, 3.5, 1.4, 0.2], + &[4.9, 3.0, 1.4, 0.2], + &[4.7, 3.2, 1.3, 0.2], + &[4.6, 3.1, 1.5, 0.2], + &[5.0, 3.6, 1.4, 0.2], + &[7.0, 3.2, 4.7, 1.4], + &[6.4, 3.2, 4.5, 1.5], + &[6.9, 3.1, 4.9, 1.5], + &[5.5, 2.3, 4.0, 1.3], + &[6.5, 2.8, 4.6, 1.5], + ]) + .unwrap(); + let y: Vec = vec![0, 0, 0, 0, 0, 1, 1, 1, 1, 1]; + + let tree = DecisionTreeClassifier::fit(&x, &y, Default::default()).unwrap(); + let probabilities = tree.predict_proba(&x).unwrap(); + + assert_eq!(probabilities.shape(), (10, 2)); + + for row in 0..10 { + let row_sum: f64 = probabilities.get_row(row).sum(); + assert!( + (row_sum - 1.0).abs() < 1e-6, + "Row probabilities should sum to 1" + ); + } + + // Check if the first 5 samples have higher probability for class 0 + for i in 0..5 { + assert!(probabilities.get((i, 0)) > probabilities.get((i, 1))); + } + + // Check if the last 5 samples have higher probability for class 1 + for i in 5..10 { + assert!(probabilities.get((i, 1)) > probabilities.get((i, 0))); + } + } + #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test