From e6e9350d4eead4b9ee69ffbe75ec20d71a84f04c Mon Sep 17 00:00:00 2001 From: Cameron Davidson-Pilon Date: Thu, 4 Dec 2014 23:09:16 -0500 Subject: [PATCH] adding explict col ordering --- lifelines/estimation.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/lifelines/estimation.py b/lifelines/estimation.py index ba412f406..5f61d2e69 100644 --- a/lifelines/estimation.py +++ b/lifelines/estimation.py @@ -718,7 +718,9 @@ def _compute_confidence_intervals(self): def predict_cumulative_hazard(self, X, id_col=None): """ - X: a (n,d) covariate matrix + X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns + can be in any order. If a numpy array, columns must be in the + same order as the training data. Returns the hazard rates for the individuals """ @@ -727,17 +729,21 @@ def predict_cumulative_hazard(self, X, id_col=None): raise NotImplementedError n, d = X.shape - try: - X_ = X.values.copy() - except: - X_ = X.copy() - X_ = X.copy() if not self.fit_intercept else np.c_[X.copy(), np.ones((n, 1))] + cols = get_index(X) + if isinstance(X, pd.DataFrame): + order = self.cumulative_hazards_.columns.drop('baseline') + X_ = X[order].values.copy() + else: + X_ = X.copy() + X_ = X_ if not self.fit_intercept else np.c_[X_, np.ones((n, 1))] return pd.DataFrame(np.dot(self.cumulative_hazards_, X_.T), index=self.timeline, columns=cols) def predict_survival_function(self, X): """ - X: a (n,d) covariate matrix + X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns + can be in any order. If a numpy array, columns must be in the + same order as the training data. Returns the survival functions for the individuals """ @@ -745,7 +751,10 @@ def predict_survival_function(self, X): def predict_percentile(self, X, p=0.5): """ - X: a (n,d) covariate matrix + X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns + can be in any order. If a numpy array, columns must be in the + same order as the training data. + Returns the median lifetimes for the individuals. http://stats.stackexchange.com/questions/102986/percentile-loss-functions """ @@ -754,7 +763,10 @@ def predict_percentile(self, X, p=0.5): def predict_median(self, X): """ - X: a (n,d) covariate matrix + X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns + can be in any order. If a numpy array, columns must be in the + same order as the training data. + Returns the median lifetimes for the individuals """ return self.predict_percentile(X, 0.5) @@ -762,6 +774,12 @@ def predict_median(self, X): def predict_expectation(self, X): """ Compute the expected lifetime, E[T], using covarites X. + + X: a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns + can be in any order. If a numpy array, columns must be in the + same order as the training data. + + Returns the expected lifetimes for the individuals """ index = get_index(X) t = self.cumulative_hazards_.index @@ -1117,10 +1135,15 @@ def predict_partial_hazard(self, X): """ index = get_index(X) + if isinstance(X, pd.DataFrame): + order = self.hazards_.columns + X = X[order] + if self.normalize: # Assuming correct ordering and number of columns X = normalize(X, self._norm_mean.values, self._norm_std.values) + return pd.DataFrame(exp(np.dot(X, self.hazards_.T)), index=index) def predict_cumulative_hazard(self, X):