From 248695981a6af4a2f19cfd799d444012305d8f09 Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Mon, 24 Aug 2020 22:42:33 -0400 Subject: [PATCH 1/6] nice, this all works except for strata --- lifelines/fitters/__init__.py | 5 +- lifelines/fitters/coxph_fitter.py | 349 +++++++++++++++++- ...piecewise_exponential_regression_fitter.py | 7 +- lifelines/utils/__init__.py | 4 +- 4 files changed, 343 insertions(+), 22 deletions(-) diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index f32493383..1812f5877 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -1293,6 +1293,7 @@ class ParametricRegressionFitter(RegressionFitter): _scipy_fit_method = "BFGS" _scipy_fit_options: Dict[str, Any] = dict() fit_intercept = False + force_no_intercept = False regressors = None strata = None @@ -1743,7 +1744,9 @@ def _fit( self._central_values = self._compute_central_values_of_raw_training_data(df, self.strata) regressors = utils.coalesce(regressors, self.regressors, {p: None for p in self._fitted_parameter_names}) - self.regressors = utils.CovariateParameterMappings(regressors, df, force_intercept=self.fit_intercept) + self.regressors = utils.CovariateParameterMappings( + regressors, df, force_intercept=self.fit_intercept, force_no_intercept=self.force_no_intercept + ) Xs = self.regressors.transform_df(df) self._check_values_pre_fitting(Xs, utils.coalesce(Ts[1], Ts[0]), E, weights, entries) diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index a4ad3920c..cc32eecc5 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -50,7 +50,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin): the level in the confidence intervals. baseline_estimation_method: string, optional - specify how the fitter should estimate the baseline. ``"breslow"`` or ``"spline"`` + specify how the fitter should estimate the baseline. ``"breslow"``, ``"spline"``, or ``"piecewise"`` penalizer: float or array, optional (default=0.0) Attach a penalty to the size of the coefficients during regression. This improves @@ -74,6 +74,9 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin): Used when ``baseline_estimation_method="spline"`. Set the number of knots (interior & exterior) in the baseline hazard. Should be atleast 2. Royston et. al, the authors of this model, suggest 4 to start, but any values between 2 and 6 are reasonable. + breakpoints: int + Used when ``baseline_estimation_method="piecewise"`. Set the positions of the baseline hazard breakpoints. + Examples -------- .. code:: python @@ -130,6 +133,7 @@ def __init__( strata: Optional[Union[List[str], str]] = None, l1_ratio: float = 0.0, n_baseline_knots: Optional[int] = None, + breakpoints: Optional[List] = None, **kwargs, ) -> None: @@ -143,6 +147,7 @@ def __init__( self.l1_ratio = l1_ratio self.baseline_estimation_method = baseline_estimation_method self.n_baseline_knots = n_baseline_knots + self.breakpoints = breakpoints @utils.CensoringType.right_censoring def fit( @@ -304,6 +309,8 @@ def _fit_model(self, *args, **kwargs): return self._fit_model_breslow(*args, **kwargs) elif self.baseline_estimation_method == "spline": return self._fit_model_spline(*args, **kwargs) + elif self.baseline_estimation_method == "piecewise": + return self._fit_model_piecewise(*args, **kwargs) else: raise ValueError("Invalid model estimation.") @@ -314,6 +321,53 @@ def _fit_model_breslow(self, *args, **kwargs): model.fit(*args, **kwargs) return model + def _fit_model_piecewise(self, *args, **kwargs): + df = args[0].copy() + # handle if they provided a formula or not + formula = kwargs.pop("formula") + + # handle cluster_col + if kwargs["cluster_col"] is not None: + raise ValueError("cluster_col is not available for this baseline estimation method") + assert self.breakpoints is not None, "breakpoints must be set in initialization." + + # these are not needed, should be popped off. + kwargs.pop("cluster_col") + kwargs.pop("step_size") + kwargs.pop("batch_mode") + + # handle strata + strata = kwargs.pop("strata") + + if strata is None: + regressors = {**{"beta_": formula}, **{"log_lambda%d_" % i: "1" for i in range(1, len(self.breakpoints) + 2)}} + strata_values = None + elif isinstance(strata, (list, str)): + strata_namer = ParametricPiecewiseBaselinePHFitter._strata_labeler + strata = utils._to_list(strata) + + df = df.set_index(strata).sort_index() + + # how many unique strata values are there? + strata_values = df.groupby(strata).size().index.tolist() + regressors = {"beta_": formula} + for stratum in strata_values: + regressors.update({strata_namer(stratum, i): "1" for i in range(1, len(self.breakpoints) + 2)}) + else: + raise ValueError("Wrong type for strata. String, None, or list of strings") + + model = ParametricPiecewiseBaselinePHFitter( + strata=strata, + strata_values=strata_values, + penalizer=self.penalizer, + l1_ratio=self.l1_ratio, + breakpoints=self.breakpoints, + alpha=self.alpha, + label=self._label, + ) + model.fit(df, *args[1:], regressors=regressors, **kwargs) + return model + def _fit_model_spline(self, *args, **kwargs): df = args[0].copy() @@ -338,7 +392,7 @@ def _fit_model_spline(self, *args, **kwargs): regressors = {**{"beta_": formula}, **{"phi%d_" % i: "1" for i in range(1, self.n_baseline_knots + 1)}} strata_values = None elif isinstance(strata, (list, str)): - spline_namer = ParametricSplinePHFitter._strata_spline_labeler + strata_namer = ParametricSplinePHFitter._strata_labeler strata = utils._to_list(strata) df = df.set_index(strata).sort_index() @@ -347,7 +401,7 @@ def _fit_model_spline(self, *args, **kwargs): strata_values = df.groupby(strata).size().index.tolist() regressors = {"beta_": formula} for stratum in strata_values: - regressors.update({spline_namer(stratum, i): "1" for i in range(1, self.n_baseline_knots + 1)}) + regressors.update({strata_namer(stratum, i): "1" for i in range(1, self.n_baseline_knots + 1)}) else: raise ValueError("Wrong type for strata. String, None, or list of strings") @@ -404,6 +458,8 @@ def print_summary(self, decimals=2, style=None, columns=None, **kwargs): headers.append(("strata", self.strata)) if self.baseline_estimation_method == "spline": headers.append(("number of baseline knots", self.n_baseline_knots)) + if self.baseline_estimation_method == "piecewise": + headers.append(("location of breaks", self.breakpoints)) headers.extend( [ @@ -428,7 +484,7 @@ def print_summary(self, decimals=2, style=None, columns=None, **kwargs): ("Partial AIC", "{:.{prec}f}".format(self.AIC_partial_, prec=decimals)), ] ) - elif self.baseline_estimation_method == "spline": + elif self.baseline_estimation_method in ["spline", "piecewise"]: footers.append(("AIC", "{:.{prec}f}".format(self.AIC_, prec=decimals))) footers.extend( @@ -2404,7 +2460,7 @@ def __init__(self, strata, strata_values, n_baseline_knots=1, *args, **kwargs): super(ParametricSplinePHFitter, self).__init__(*args, **kwargs) @staticmethod - def _strata_spline_labeler(stratum, i): + def _strata_labeler(stratum, i): return "s%s_phi%d_" % (stratum, i) @property @@ -2412,7 +2468,7 @@ def _fitted_parameter_names(self): if self.strata is not None: names = ["beta_"] for stratum in self.strata_values: - names += [self._strata_spline_labeler(stratum, i) for i in range(1, self.n_baseline_knots + 1)] + names += [self._strata_labeler(stratum, i) for i in range(1, self.n_baseline_knots + 1)] return names else: return ["beta_"] + ["phi%d_" % i for i in range(1, self.n_baseline_knots + 1)] @@ -2430,14 +2486,9 @@ def _create_initial_point(self, Ts, E, entries, weights, Xs): params = {"beta_": np.zeros(len(Xs["beta_"].columns))} for stratum in self.strata_values: params.update( - { - self._strata_spline_labeler(stratum, 1): np.array([0.05]), - self._strata_spline_labeler(stratum, 2): np.array([-0.05]), - } - ) - params.update( - {self._strata_spline_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)} + {self._strata_labeler(stratum, 1): np.array([0.05]), self._strata_labeler(stratum, 2): np.array([-0.05])} ) + params.update({self._strata_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}) return params @@ -2463,11 +2514,11 @@ def _cumulative_hazard_with_strata(self, params, T, Xs): else: lT_ = lT[start:stop] - H_ = safe_exp(anp.dot(Xs_["beta_"], params["beta_"]) + params[self._strata_spline_labeler(stratum, 1)] * lT_) + H_ = safe_exp(anp.dot(Xs_["beta_"], params["beta_"]) + params[self._strata_labeler(stratum, 1)] * lT_) for i in range(2, self.n_baseline_knots + 1): H_ = H_ * safe_exp( - params[self._strata_spline_labeler(stratum, i)] + params[self._strata_labeler(stratum, i)] * self.basis(lT_, anp.log(self.knots[i - 1]), anp.log(self.knots[0]), anp.log(self.knots[-1])) ) @@ -2664,6 +2715,274 @@ def AIC_partial_(self): ) +class ParametricPiecewiseBaselinePHFitter(ParametricRegressionFitter, ProportionalHazardMixin): + r""" + Proportional hazard model with piecewise constant model for the baseline hazard. + + .. math:: h(t|x) = h_0(t) \exp(x' \beta) + + where + + .. math:: h_0(t) = \begin{cases} + 1/\lambda_0 & \text{if $t \le \tau_0$} \\ + 1/\lambda_1 & \text{if $\tau_0 < t \le \tau_1$} \\ + 1/\lambda_2 & \text{if $\tau_1 < t \le \tau_2$} \\ + ... + \end{cases} + + + Note + ------- + This is a "hidden" class that is invoked when using ``baseline_estimation_method="piecewise"``. You probably want to use ``CoxPHFitter``, not this. + """ + + _KNOWN_MODEL = True + _FAST_MEDIAN_PREDICT = False + + cluster_col = None + force_no_intercept = True + + def __init__(self, strata, strata_values, breakpoints, *args, **kwargs): + self.strata = strata + self.strata_values = strata_values + + assert ( + breakpoints is not None and len(breakpoints) > 1 + ), "breakpoints should be greater than 1. Set in class instantiation" + + self.breakpoints = breakpoints + self.n_breakpoints = len(breakpoints) + super(ParametricPiecewiseBaselinePHFitter, self).__init__(*args, **kwargs) + + @staticmethod + def _strata_labeler(stratum, i): + return "s%s_lambda%d_" % (stratum, i) + + @property + def _fitted_parameter_names(self): + if self.strata is not None: + names = ["beta_"] + for stratum in self.strata_values: + names += [self._strata_labeler(stratum, i) for i in range(1, self.n_breakpoints + 2)] + return names + else: + return ["beta_"] + ["log_lambda%d_" % i for i in range(1, self.n_breakpoints + 2)] + + def _create_initial_point(self, Ts, E, entries, weights, Xs): + # Some non-zero initial points. This is important as it nudges the model slightly away from the degenerate all-zeros model. Try setting it to 0, and watch the model fail to converge. + if self.strata is not None: + params = {"beta_": np.zeros(len(Xs["beta_"].columns))} + for stratum in self.strata_values: + params.update( + {self._strata_labeler(stratum, 1): np.array([0.05]), self._strata_labeler(stratum, 2): np.array([-0.05])} + ) + params.update({self._strata_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_breakpoints + 2)}) + + return params + + else: + return { + **{ + "beta_": np.zeros(len(Xs["beta_"].columns)), + "log_lambda1_": np.array([0.05]), + "log_lambda2_": np.array([-0.05]), + }, + **{"log_lambda%d_" % i: np.array([0.0]) for i in range(3, self.n_breakpoints + 2)}, + } + + def _cumulative_hazard_with_strata(self, params, T, Xs): + # TODO + pass + + def _cumulative_hazard_sans_strata(self, params, T, Xs): + partial_hazard = safe_exp(anp.dot(Xs["beta_"], params["beta_"])) + n = T.shape[0] + T = T.reshape((n, 1)) + bps = anp.append(self.breakpoints, [anp.inf]) + M = anp.minimum(anp.tile(bps, (n, 1)), T) + M = anp.hstack([M[:, tuple([0])], anp.diff(M, axis=1)]) + log_lambdas_ = anp.array([params[param] for param in self._fitted_parameter_names if param != "beta_"]) + return partial_hazard * (M * anp.exp(log_lambdas_).T).sum(1) + + def _cumulative_hazard(self, params, T, Xs): + if self.strata is not None: + return self._cumulative_hazard_with_strata(params, T, Xs) + else: + return self._cumulative_hazard_sans_strata(params, T, Xs) + + @property + def baseline_hazard_(self): + return self.baseline_hazard_at_times(self.timeline) + + @property + def baseline_survival_(self): + return self.baseline_survival_at_times(self.timeline) + + @property + def baseline_cumulative_hazard_(self): + return self.baseline_cumulative_hazard_at_times(self.timeline) + + def baseline_hazard_at_times(self, times=None): + """ + Predict the baseline hazard at times (Defaults to observed durations) + """ + times = utils.coalesce(times, self.timeline) + if self.strata is not None: + v = self.predict_hazard(self._central_values.reset_index(), times=times) + v.columns = self._central_values.index.values + else: + v = self.predict_hazard(self._central_values, times=times) + v.columns = ["baseline hazard"] + return v + + def baseline_survival_at_times(self, times=None): + """ + Predict the baseline survival at times (Defaults to observed durations) + """ + times = utils.coalesce(times, self.timeline) + if self.strata is not None: + v = self.predict_survival_function(self._central_values.reset_index(), times=times) + v.columns = self._central_values.index.values + else: + v = self.predict_survival_function(self._central_values, times=times) + v.columns = ["baseline survival"] + return v + + def baseline_cumulative_hazard_at_times(self, times=None): + """ + Predict the baseline cumulative hazard at times (Defaults to observed durations) + """ + times = utils.coalesce(times, self.timeline) + if self.strata is not None: + v = self.predict_cumulative_hazard(self._central_values.reset_index(), times=times) + v.columns = self._central_values.index.values + else: + v = self.predict_cumulative_hazard(self._central_values, times=times) + v.columns = ["baseline cumulative hazard"] + return v + + def predict_cumulative_hazard(self, df, *, times=None, conditional_after=None): + """ + Predict the cumulative hazard for individuals, given their covariates. + + Parameters + ---------- + + df: DataFrame + a (n,d) DataFrame. If a DataFrame, columns + can be in any order. + times: iterable, optional + an iterable (array, list, series) of increasing times to predict the cumulative hazard at. Default + is the set of all durations in the training dataset (observed and unobserved). + conditional_after: iterable, optional + Must be equal is size to (df.shape[0],) (`n` above). An iterable (array, list, series) of possibly non-zero values that represent how long the + subject has already lived for. Ex: if :math:`T` is the unknown event time, then this represents + :math:`T | T > s`. This is useful for knowing the *remaining* hazard/survival of censored subjects. + The new timeline is the remaining duration of the subject, i.e. normalized back to starting at 0. + + Returns + ------- + DataFrame + the cumulative hazards of individuals over the timeline + + """ + if isinstance(df, pd.Series): + df = df.to_frame().T.infer_objects() + + df = df.copy() + + if self.strata is not None: + df = df.reset_index().set_index(self.strata) + + cumulative_hazard = pd.DataFrame() + if conditional_after is not None: + # need to pass this into the groupby + df["conditional_after_"] = conditional_after + + for stratum, stratified_X in df.groupby(self.strata): + + if conditional_after is not None: + conditional_after_ = stratified_X.pop("conditional_after_") + else: + conditional_after_ = None + + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning) + cumulative_hazard_ = super(ParametricPiecewiseBaselinePHFitter, self).predict_cumulative_hazard( + stratified_X, times=times, conditional_after=conditional_after_ + ) + cumulative_hazard_.columns = stratified_X["index"] + cumulative_hazard = cumulative_hazard.merge(cumulative_hazard_, how="outer", right_index=True, left_index=True) + + return cumulative_hazard + + else: + return super(ParametricPiecewiseBaselinePHFitter, self).predict_cumulative_hazard( + df, times=times, conditional_after=conditional_after + ) + + def predict_hazard(self, df, *, conditional_after=None, times=None): + """ + Predict the hazard for individuals, given their covariates. + + Parameters + ---------- + + df: DataFrame + a (n,d) DataFrame. If a DataFrame, columns + can be in any order. + times: iterable, optional + an iterable (array, list, series) of increasing times to predict the cumulative hazard at. Default + is the set of all durations in the training dataset (observed and unobserved). + conditional_after: + Not implemented yet. + + Returns + ------- + DataFrame + the hazards of individuals over the timeline + + """ + if isinstance(df, pd.Series): + df = df.to_frame().T.infer_objects() + + df = df.copy() + + if self.strata is not None: + df = df.reset_index().set_index(self.strata) + + cumulative_hazard = pd.DataFrame() + if conditional_after is not None: + # need to pass this into the groupby + df["conditional_after_"] = conditional_after + + for stratum, stratified_X in df.groupby(self.strata): + + if conditional_after is not None: + conditional_after_ = stratified_X.pop("conditional_after_") + else: + conditional_after_ = None + + cumulative_hazard_ = super(ParametricPiecewiseBaselinePHFitter, self).predict_hazard( + stratified_X, times=times, conditional_after=conditional_after_ + ) + cumulative_hazard_.columns = stratified_X["index"] + cumulative_hazard = cumulative_hazard.merge(cumulative_hazard_, how="outer", right_index=True, left_index=True) + + return cumulative_hazard + + else: + return super(ParametricPiecewiseBaselinePHFitter, self).predict_hazard( + df, times=times, conditional_after=conditional_after + ) + + @property + def AIC_partial_(self): + raise exceptions.StatError( + "Since the piecewise model is fully parametric (and not semi-parametric), the partial AIC does not exist. You probably want the `.AIC_` property instead" + ) + + class _BatchVsSingle: BATCH = "batch" diff --git a/lifelines/fitters/piecewise_exponential_regression_fitter.py b/lifelines/fitters/piecewise_exponential_regression_fitter.py index ebd5e07e5..507cf08d3 100644 --- a/lifelines/fitters/piecewise_exponential_regression_fitter.py +++ b/lifelines/fitters/piecewise_exponential_regression_fitter.py @@ -36,6 +36,8 @@ class PiecewiseExponentialRegressionFitter(ParametricRegressionFitter): paper replication `here `_ """ + + # mmm not really... _FAST_MEDIAN_PREDICT = True # about 50% faster than BFGS @@ -79,11 +81,6 @@ def _cumulative_hazard(self, params, T, Xs): lambdas_ = np.array([safe_exp(-np.dot(Xs[param], params[param])) for param in self._fitted_parameter_names]) return (M * lambdas_.T).sum(1) - def _log_hazard(self, params, T, X): - hz = self._hazard(params, T, X) - hz = np.clip(hz, 1e-20, np.inf) - return np.log(hz) - def _prep_inputs_for_prediction_and_return_parameters(self, X): X = X.copy() diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index 8cca424c6..d12d77cff 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -1889,7 +1889,9 @@ def transform_df(self, df: pd.DataFrame): else: raise ValueError("Unexpected transform.") - if self.force_no_intercept: + # some parameters are constants (like in piecewise and splines) and so should + # not be dropped. + if self.force_no_intercept and X.shape[1] > 1: try: X = X.drop(self.INTERCEPT_COL, axis=1) except: From ce7e030a030d2f3eac92dcd5d26df17a1883eb12 Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Tue, 25 Aug 2020 18:48:17 -0400 Subject: [PATCH 2/6] turned into lots of improvements --- lifelines/fitters/__init__.py | 29 +- lifelines/fitters/coxph_fitter.py | 461 +++++++----------- lifelines/fitters/mixins.py | 5 +- ...piecewise_exponential_regression_fitter.py | 5 +- lifelines/statistics.py | 6 +- lifelines/utils/__init__.py | 20 +- 6 files changed, 233 insertions(+), 293 deletions(-) diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index 1812f5877..be89d9af0 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -1305,6 +1305,7 @@ def __init__(self, alpha: float = 0.05, penalizer: Union[float, np.array] = 0.0, def _check_values_post_fitting(self, df, T, E, weights, entries): utils.check_dimensions(df) utils.check_complete_separation(df, E, T, self.event_col) + utils.check_scaling(df) def _pre_fit_model(self, Ts, E, Xs) -> None: return @@ -1998,9 +1999,10 @@ def _compute_variance_matrix(self) -> np.array: 0. Are there any lifelines warnings outputted during the `fit`? 1. Inspect your DataFrame: does everything look as expected? Do you need to add/drop a constant (intercept) column? - 2. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables. - 3. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`. - 4. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence. + 2. Does a particularly large variable need to be centered to 0? + 3. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables. + 4. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`. + 5. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence. """ % self._class_name ) @@ -2072,6 +2074,15 @@ def _ll_null(self): return self._ll_null_ regressors = {name: "1" for name in self._fitted_parameter_names} + + # we can reuse the final values from the full fit for this smaller fit. + initial_point = {} + for name in self._fitted_parameter_names: + try: + initial_point[name] = self.params_[name]["Intercept"] + except: + initial_point[name] = 0.0 + df = pd.DataFrame({"entry": self.entry, "w": self.weights}) # some fitters will have custom __init__ fields that need to be provided (Piecewise, Spline...) @@ -2084,13 +2095,19 @@ def _ll_null(self): if utils.CensoringType.is_right_censoring(self): df["T"], df["E"] = self.durations, self.event_observed - model.fit_right_censoring(df, "T", "E", entry_col="entry", weights_col="w", regressors=regressors) + model.fit_right_censoring( + df, "T", "E", entry_col="entry", weights_col="w", regressors=regressors, initial_point=initial_point + ) elif utils.CensoringType.is_interval_censoring(self): df["lb"], df["ub"], df["E"] = self.lower_bound, self.upper_bound, self.event_observed - model.fit_interval_censoring(df, "lb", "ub", "E", entry_col="entry", weights_col="w", regressors=regressors) + model.fit_interval_censoring( + df, "lb", "ub", "E", entry_col="entry", weights_col="w", regressors=regressors, initial_point=initial_point + ) if utils.CensoringType.is_left_censoring(self): df["T"], df["E"] = self.durations, self.event_observed - model.fit_left_censoring(df, "T", "E", entry_col="entry", weights_col="w", regressors=regressors) + model.fit_left_censoring( + df, "T", "E", entry_col="entry", weights_col="w", regressors=regressors, initial_point=initial_point + ) self._ll_null_ = model.log_likelihood_ return self._ll_null_ diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index cc32eecc5..2d7270cd4 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -1267,7 +1267,7 @@ def _newton_rhapson_for_efron_model( elif not success: self._check_values_post_fitting(X, T, E, weights) warnings.warn( - "Newton-Rhaphson failed to converge sufficiently in %d steps.\n" % max_steps, exceptions.ConvergenceWarning + "Newton-Rhaphson failed to converge sufficiently. {0}".format(CONVERGENCE_DOCS), exceptions.ConvergenceWarning ) return beta, ll_, hessian @@ -2418,125 +2418,10 @@ def AIC_(self): ) -class ParametricSplinePHFitter(ParametricRegressionFitter, SplineFitterMixin, ProportionalHazardMixin): - r""" - Proportional hazard model with cubic splines model for the baseline hazard. - - .. math:: h(t|x) = h_0(t) \exp((x - \overline{x})' \beta) - - where - - .. math:: h_0(t) = \exp{\left( \phi_0 + \phi_1\log{t} + \sum_{j=2}^N \phi_j v_j(\log{t})\right)} - - where :math:`v_j` are our cubic basis functions at predetermined knots. See references for exact definition. - - References - ------------ - Royston, P., & Parmar, M. K. B. (2002). Flexible parametric proportional-hazards and proportional-odds models for censored survival data, with application to prognostic modelling and estimation of treatment effects. Statistics in Medicine, 21(15), 2175–2197. doi:10.1002/sim.1203  - - Note - ------- - This is a "hidden" class that is invoked when using ``baseline_estimation_method="spline"``. You probably want to use ``CoxPHFitter``, not this. - """ - - _scipy_fit_method = "SLSQP" - _scipy_fit_options = {"maxiter": 1000, "iprint": 100} +class ParametricCoxModelFitter(ParametricRegressionFitter, ProportionalHazardMixin): _KNOWN_MODEL = True - _FAST_MEDIAN_PREDICT = False - cluster_col = None - fit_intercept = True - - def __init__(self, strata, strata_values, n_baseline_knots=1, *args, **kwargs): - self.strata = strata - self.strata_values = strata_values - - assert ( - n_baseline_knots is not None and n_baseline_knots > 1 - ), "n_baseline_knots should be greater than 1. Set in class instantiation" - - self.n_baseline_knots = n_baseline_knots - super(ParametricSplinePHFitter, self).__init__(*args, **kwargs) - - @staticmethod - def _strata_labeler(stratum, i): - return "s%s_phi%d_" % (stratum, i) - - @property - def _fitted_parameter_names(self): - if self.strata is not None: - names = ["beta_"] - for stratum in self.strata_values: - names += [self._strata_labeler(stratum, i) for i in range(1, self.n_baseline_knots + 1)] - return names - else: - return ["beta_"] + ["phi%d_" % i for i in range(1, self.n_baseline_knots + 1)] - - def _set_knots(self, T, E): - self.knots = np.percentile(T[E.astype(bool).values], np.linspace(5, 95, self.n_baseline_knots + 1)) - return - - def _pre_fit_model(self, Ts, E, df): - self._set_knots(Ts[0], E) - - def _create_initial_point(self, Ts, E, entries, weights, Xs): - # Some non-zero initial points. This is important as it nudges the model slightly away from the degenerate all-zeros model. Try setting it to 0, and watch the model fail to converge. - if self.strata is not None: - params = {"beta_": np.zeros(len(Xs["beta_"].columns))} - for stratum in self.strata_values: - params.update( - {self._strata_labeler(stratum, 1): np.array([0.05]), self._strata_labeler(stratum, 2): np.array([-0.05])} - ) - params.update({self._strata_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}) - - return params - - else: - return { - **{"beta_": np.zeros(len(Xs["beta_"].columns)), "phi1_": np.array([0.05]), "phi2_": np.array([-0.05])}, - **{"phi%d_" % i: np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}, - } - - def _cumulative_hazard_with_strata(self, params, T, Xs): - lT = anp.log(T) - output = [] - - # hack for iterating over stratified T - start, stop = 0, 0 - - # I can assume Xs is sorted by strata values - for stratum, Xs_ in Xs.groupby(self.strata): - stop = stop + Xs_.size - - if T.ndim > 1: - lT_ = lT[:, start:stop] - else: - lT_ = lT[start:stop] - - H_ = safe_exp(anp.dot(Xs_["beta_"], params["beta_"]) + params[self._strata_labeler(stratum, 1)] * lT_) - - for i in range(2, self.n_baseline_knots + 1): - H_ = H_ * safe_exp( - params[self._strata_labeler(stratum, i)] - * self.basis(lT_, anp.log(self.knots[i - 1]), anp.log(self.knots[0]), anp.log(self.knots[-1])) - ) - - output.append(H_) - start = stop - - return anp.hstack(output) if output else anp.array([]) - - def _cumulative_hazard_sans_strata(self, params, T, Xs): - lT = anp.log(T) - - H = safe_exp(anp.dot(Xs["beta_"], params["beta_"]) + params["phi1_"] * lT) - - for i in range(2, self.n_baseline_knots + 1): - H = H * safe_exp( - params["phi%d_" % i] * self.basis(lT, anp.log(self.knots[i - 1]), anp.log(self.knots[0]), anp.log(self.knots[-1])) - ) - return H def _cumulative_hazard(self, params, T, Xs): if self.strata is not None: @@ -2642,7 +2527,7 @@ def predict_cumulative_hazard(self, df, *, times=None, conditional_after=None): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=UserWarning) - cumulative_hazard_ = super(ParametricSplinePHFitter, self).predict_cumulative_hazard( + cumulative_hazard_ = super(ParametricCoxModelFitter, self).predict_cumulative_hazard( stratified_X, times=times, conditional_after=conditional_after_ ) cumulative_hazard_.columns = stratified_X["index"] @@ -2651,7 +2536,7 @@ def predict_cumulative_hazard(self, df, *, times=None, conditional_after=None): return cumulative_hazard else: - return super(ParametricSplinePHFitter, self).predict_cumulative_hazard( + return super(ParametricCoxModelFitter, self).predict_cumulative_hazard( df, times=times, conditional_after=conditional_after ) @@ -2697,7 +2582,7 @@ def predict_hazard(self, df, *, conditional_after=None, times=None): else: conditional_after_ = None - cumulative_hazard_ = super(ParametricSplinePHFitter, self).predict_hazard( + cumulative_hazard_ = super(ParametricCoxModelFitter, self).predict_hazard( stratified_X, times=times, conditional_after=conditional_after_ ) cumulative_hazard_.columns = stratified_X["index"] @@ -2706,7 +2591,7 @@ def predict_hazard(self, df, *, conditional_after=None, times=None): return cumulative_hazard else: - return super(ParametricSplinePHFitter, self).predict_hazard(df, times=times, conditional_after=conditional_after) + return super(ParametricCoxModelFitter, self).predict_hazard(df, times=times, conditional_after=conditional_after) @property def AIC_partial_(self): @@ -2715,7 +2600,126 @@ def AIC_partial_(self): ) -class ParametricPiecewiseBaselinePHFitter(ParametricRegressionFitter, ProportionalHazardMixin): +class ParametricSplinePHFitter(ParametricCoxModelFitter, SplineFitterMixin): + r""" + Proportional hazard model with cubic splines model for the baseline hazard. + + .. math:: h(t|x) = h_0(t) \exp((x - \overline{x})' \beta) + + where + + .. math:: h_0(t) = \exp{\left( \phi_0 + \phi_1\log{t} + \sum_{j=2}^N \phi_j v_j(\log{t})\right)} + + where :math:`v_j` are our cubic basis functions at predetermined knots. See references for exact definition. + + References + ------------ + Royston, P., & Parmar, M. K. B. (2002). Flexible parametric proportional-hazards and proportional-odds models for censored survival data, with application to prognostic modelling and estimation of treatment effects. Statistics in Medicine, 21(15), 2175–2197. doi:10.1002/sim.1203  + + Note + ------- + This is a "hidden" class that is invoked when using ``baseline_estimation_method="spline"``. You probably want to use ``CoxPHFitter``, not this. + """ + + _scipy_fit_method = "SLSQP" + _scipy_fit_options = {"maxiter": 1000, "iprint": 100} + + _FAST_MEDIAN_PREDICT = False + + fit_intercept = True + + def __init__(self, strata, strata_values, n_baseline_knots=1, *args, **kwargs): + self.strata = strata + self.strata_values = strata_values + + assert ( + n_baseline_knots is not None and n_baseline_knots > 1 + ), "n_baseline_knots should be greater than 1. Set in class instantiation" + + self.n_baseline_knots = n_baseline_knots + super(ParametricSplinePHFitter, self).__init__(*args, **kwargs) + + @staticmethod + def _strata_labeler(stratum, i): + return "s%s_phi%d_" % (stratum, i) + + @property + def _fitted_parameter_names(self): + if self.strata is not None: + names = ["beta_"] + for stratum in self.strata_values: + names += [self._strata_labeler(stratum, i) for i in range(1, self.n_baseline_knots + 1)] + return names + else: + return ["beta_"] + ["phi%d_" % i for i in range(1, self.n_baseline_knots + 1)] + + def _set_knots(self, T, E): + self.knots = np.percentile(T[E.astype(bool).values], np.linspace(5, 95, self.n_baseline_knots + 1)) + return + + def _pre_fit_model(self, Ts, E, df): + self._set_knots(Ts[0], E) + + def _create_initial_point(self, Ts, E, entries, weights, Xs): + # Some non-zero initial points. This is important as it nudges the model slightly away from the degenerate all-zeros model. Try setting it to 0, and watch the model fail to converge. + if self.strata is not None: + params = {"beta_": np.zeros(len(Xs["beta_"].columns))} + for stratum in self.strata_values: + params.update( + {self._strata_labeler(stratum, 1): np.array([0.05]), self._strata_labeler(stratum, 2): np.array([-0.05])} + ) + params.update({self._strata_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}) + + return params + + else: + return { + **{"beta_": np.zeros(len(Xs["beta_"].columns)), "phi1_": np.array([0.05]), "phi2_": np.array([-0.05])}, + **{"phi%d_" % i: np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}, + } + + def _cumulative_hazard_with_strata(self, params, T, Xs): + lT = anp.log(T) + output = [] + + # hack for iterating over stratified T + start, stop = 0, 0 + + # I can assume Xs is sorted by strata values + for stratum, Xs_ in Xs.groupby(self.strata): + stop = stop + Xs_.size + + if T.ndim > 1: + lT_ = lT[:, start:stop] + else: + lT_ = lT[start:stop] + + H_ = safe_exp(anp.dot(Xs_["beta_"], params["beta_"]) + params[self._strata_labeler(stratum, 1)] * lT_) + + for i in range(2, self.n_baseline_knots + 1): + H_ = H_ * safe_exp( + params[self._strata_labeler(stratum, i)] + * self.basis(lT_, anp.log(self.knots[i - 1]), anp.log(self.knots[0]), anp.log(self.knots[-1])) + ) + + output.append(H_) + start = stop + + return anp.hstack(output) if output else anp.array([]) + + def _cumulative_hazard_sans_strata(self, params, T, Xs): + lT = anp.log(T) + + H = safe_exp(anp.dot(Xs["beta_"], params["beta_"]) + params["phi1_"] * lT) + + for i in range(2, self.n_baseline_knots + 1): + H = H * safe_exp( + params["phi%d_" % i] * self.basis(lT, anp.log(self.knots[i - 1]), anp.log(self.knots[0]), anp.log(self.knots[-1])) + ) + return H + + +class ParametricPiecewiseBaselinePHFitter(ParametricCoxModelFitter, ProportionalHazardMixin): r""" Proportional hazard model with piecewise constant model for the baseline hazard. @@ -2747,8 +2751,8 @@ def __init__(self, strata, strata_values, breakpoints, *args, **kwargs): self.strata_values = strata_values assert ( - breakpoints is not None and len(breakpoints) > 1 - ), "breakpoints should be greater than 1. Set in class instantiation" + breakpoints is not None and len(breakpoints) > 0 + ), "breakpoints should be greater than 0. Set in class instantiation" self.breakpoints = breakpoints self.n_breakpoints = len(breakpoints) @@ -2791,8 +2795,33 @@ def _create_initial_point(self, Ts, E, entries, weights, Xs): } def _cumulative_hazard_with_strata(self, params, T, Xs): - # TODO - pass + output = [] + + # hack for iterating over stratified T + start, stop = 0, 0 + + # I can assume Xs is sorted by strata values + for stratum, Xs_ in Xs.groupby(self.strata): + stop = stop + Xs_.size + + if T.ndim > 1: + T_ = T[:, start:stop] + else: + T_ = T[start:stop] + + partial_hazard = safe_exp(anp.dot(Xs_["beta_"], params["beta_"])) + n = T_.shape[0] + T_ = T_.reshape((n, 1)) + bps = anp.append(self.breakpoints, [anp.inf]) + M = anp.minimum(anp.tile(bps, (n, 1)), T_) + M = anp.hstack([M[:, tuple([0])], anp.diff(M, axis=1)]) + log_lambdas_ = anp.array([params[self._strata_labeler(stratum, i)] for i in range(1, self.n_breakpoints + 2)]) + H_ = partial_hazard * (M * anp.exp(log_lambdas_).T).sum(1) + + output.append(H_) + start = stop + + return anp.hstack(output) if output else anp.array([]) def _cumulative_hazard_sans_strata(self, params, T, Xs): partial_hazard = safe_exp(anp.dot(Xs["beta_"], params["beta_"])) @@ -2804,183 +2833,69 @@ def _cumulative_hazard_sans_strata(self, params, T, Xs): log_lambdas_ = anp.array([params[param] for param in self._fitted_parameter_names if param != "beta_"]) return partial_hazard * (M * anp.exp(log_lambdas_).T).sum(1) - def _cumulative_hazard(self, params, T, Xs): - if self.strata is not None: - return self._cumulative_hazard_with_strata(params, T, Xs) - else: - return self._cumulative_hazard_sans_strata(params, T, Xs) - - @property - def baseline_hazard_(self): - return self.baseline_hazard_at_times(self.timeline) - - @property - def baseline_survival_(self): - return self.baseline_survival_at_times(self.timeline) - - @property - def baseline_cumulative_hazard_(self): - return self.baseline_cumulative_hazard_at_times(self.timeline) - - def baseline_hazard_at_times(self, times=None): - """ - Predict the baseline hazard at times (Defaults to observed durations) - """ - times = utils.coalesce(times, self.timeline) - if self.strata is not None: - v = self.predict_hazard(self._central_values.reset_index(), times=times) - v.columns = self._central_values.index.values - else: - v = self.predict_hazard(self._central_values, times=times) - v.columns = ["baseline hazard"] - return v - - def baseline_survival_at_times(self, times=None): + def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> pd.DataFrame: """ - Predict the baseline survival at times (Defaults to observed durations) - """ - times = utils.coalesce(times, self.timeline) - if self.strata is not None: - v = self.predict_survival_function(self._central_values.reset_index(), times=times) - v.columns = self._central_values.index.values - else: - v = self.predict_survival_function(self._central_values, times=times) - v.columns = ["baseline survival"] - return v - - def baseline_cumulative_hazard_at_times(self, times=None): - """ - Predict the baseline cumulative hazard at times (Defaults to observed durations) - """ - times = utils.coalesce(times, self.timeline) - if self.strata is not None: - v = self.predict_cumulative_hazard(self._central_values.reset_index(), times=times) - v.columns = self._central_values.index.values - else: - v = self.predict_cumulative_hazard(self._central_values, times=times) - v.columns = ["baseline cumulative hazard"] - return v - - def predict_cumulative_hazard(self, df, *, times=None, conditional_after=None): - """ - Predict the cumulative hazard for individuals, given their covariates. + Return the cumulative hazard rate of subjects in X at time points. Parameters ---------- - - df: DataFrame - a (n,d) DataFrame. If a DataFrame, columns - can be in any order. + X: numpy array or DataFrame + a (n,d) covariate numpy array or DataFrame. If a DataFrame, columns + can be in any order. If a numpy array, columns must be in the + same order as the training data. times: iterable, optional - an iterable (array, list, series) of increasing times to predict the cumulative hazard at. Default - is the set of all durations in the training dataset (observed and unobserved). - conditional_after: iterable, optional - Must be equal is size to (df.shape[0],) (`n` above). An iterable (array, list, series) of possibly non-zero values that represent how long the - subject has already lived for. Ex: if :math:`T` is the unknown event time, then this represents - :math:`T | T > s`. This is useful for knowing the *remaining* hazard/survival of censored subjects. - The new timeline is the remaining duration of the subject, i.e. normalized back to starting at 0. + an iterable of increasing times to predict the cumulative hazard at. Default + is the set of all durations (observed and unobserved). Uses a linear interpolation if + points in time are not in the index. Returns ------- - DataFrame - the cumulative hazards of individuals over the timeline - + cumulative_hazard_ : DataFrame + the cumulative hazard of individuals over the timeline """ - if isinstance(df, pd.Series): - df = df.to_frame().T.infer_objects() - df = df.copy() + if isinstance(df, pd.Series): + return self.predict_cumulative_hazard(df.to_frame().T) - if self.strata is not None: - df = df.reset_index().set_index(self.strata) - - cumulative_hazard = pd.DataFrame() - if conditional_after is not None: - # need to pass this into the groupby - df["conditional_after_"] = conditional_after - - for stratum, stratified_X in df.groupby(self.strata): - - if conditional_after is not None: - conditional_after_ = stratified_X.pop("conditional_after_") - else: - conditional_after_ = None - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=UserWarning) - cumulative_hazard_ = super(ParametricPiecewiseBaselinePHFitter, self).predict_cumulative_hazard( - stratified_X, times=times, conditional_after=conditional_after_ - ) - cumulative_hazard_.columns = stratified_X["index"] - cumulative_hazard = cumulative_hazard.merge(cumulative_hazard_, how="outer", right_index=True, left_index=True) - - return cumulative_hazard - - else: - return super(ParametricPiecewiseBaselinePHFitter, self).predict_cumulative_hazard( - df, times=times, conditional_after=conditional_after - ) - - def predict_hazard(self, df, *, conditional_after=None, times=None): - """ - Predict the hazard for individuals, given their covariates. - - Parameters - ---------- - - df: DataFrame - a (n,d) DataFrame. If a DataFrame, columns - can be in any order. - times: iterable, optional - an iterable (array, list, series) of increasing times to predict the cumulative hazard at. Default - is the set of all durations in the training dataset (observed and unobserved). - conditional_after: - Not implemented yet. + if conditional_after is not None: + raise NotImplementedError() - Returns - ------- - DataFrame - the hazards of individuals over the timeline + times = np.atleast_1d(utils.coalesce(times, self.timeline)).astype(float) + n = times.shape[0] + times = times.reshape((n, 1)) - """ - if isinstance(df, pd.Series): - df = df.to_frame().T.infer_objects() + bp = np.append(self.breakpoints, [np.inf]) - df = df.copy() + M = np.minimum(np.tile(bp, (n, 1)), times) + M = np.hstack([M[:, tuple([0])], np.diff(M, axis=1)]) if self.strata is not None: df = df.reset_index().set_index(self.strata) cumulative_hazard = pd.DataFrame() - if conditional_after is not None: - # need to pass this into the groupby - df["conditional_after_"] = conditional_after for stratum, stratified_X in df.groupby(self.strata): + log_lambdas_ = anp.array( + [self.params_[self._strata_labeler(stratum, i)] for i in range(1, self.n_breakpoints + 2)] + ) + lambdas_ = np.exp(log_lambdas_) - if conditional_after is not None: - conditional_after_ = stratified_X.pop("conditional_after_") - else: - conditional_after_ = None + Xs_ = self.regressors.transform_df(stratified_X) + partial_hazard = np.exp(np.dot(Xs_["beta_"], self.params_["beta_"])) - cumulative_hazard_ = super(ParametricPiecewiseBaselinePHFitter, self).predict_hazard( - stratified_X, times=times, conditional_after=conditional_after_ - ) + cumulative_hazard_ = pd.DataFrame(partial_hazard * np.dot(M, lambdas_), index=times[:, 0]) cumulative_hazard_.columns = stratified_X["index"] cumulative_hazard = cumulative_hazard.merge(cumulative_hazard_, how="outer", right_index=True, left_index=True) return cumulative_hazard else: - return super(ParametricPiecewiseBaselinePHFitter, self).predict_hazard( - df, times=times, conditional_after=conditional_after - ) + log_lambdas_ = np.array([self.params_[param] for param in self._fitted_parameter_names if param != "beta_"]) + lambdas_ = np.exp(log_lambdas_) - @property - def AIC_partial_(self): - raise exceptions.StatError( - "Since the piecewise model is fully parametric (and not semi-parametric), the partial AIC does not exist. You probably want the `.AIC_` property instead" - ) + Xs = self.regressors.transform_df(df) + partial_hazard = np.exp(np.dot(Xs["beta_"], self.params_["beta_"])) + return pd.DataFrame(partial_hazard * np.dot(M, lambdas_), columns=utils._get_index(df), index=times[:, 0]) class _BatchVsSingle: diff --git a/lifelines/fitters/mixins.py b/lifelines/fitters/mixins.py index 1c36706ff..4eab6ddbb 100644 --- a/lifelines/fitters/mixins.py +++ b/lifelines/fitters/mixins.py @@ -95,6 +95,7 @@ def check_assumptions( test_results = proportional_hazard_test(self, training_df, time_transform=["rank", "km"], precomputed_residuals=residuals) residuals_and_duration = residuals.join(training_df[self.duration_col]) + Xs = self.regressors.transform_df(df) counter = 0 n = residuals_and_duration.shape[0] @@ -134,7 +135,7 @@ def check_assumptions( ) if advice: - values = training_df[variable] + values = Xs["beta_"][variable] value_counts = values.value_counts() n_uniques = value_counts.shape[0] @@ -177,7 +178,7 @@ def check_assumptions( ) if show_plots: - + print("Bootstrapping lowess lines...") from matplotlib import pyplot as plt fig = plt.figure() diff --git a/lifelines/fitters/piecewise_exponential_regression_fitter.py b/lifelines/fitters/piecewise_exponential_regression_fitter.py index 507cf08d3..6643bed70 100644 --- a/lifelines/fitters/piecewise_exponential_regression_fitter.py +++ b/lifelines/fitters/piecewise_exponential_regression_fitter.py @@ -37,8 +37,7 @@ class PiecewiseExponentialRegressionFitter(ParametricRegressionFitter): """ - # mmm not really... - _FAST_MEDIAN_PREDICT = True + _FAST_MEDIAN_PREDICT = True # mmm not really... # about 50% faster than BFGS _scipy_fit_method = "SLSQP" @@ -116,7 +115,7 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p if conditional_after is not None: raise NotImplementedError() - times = np.atleast_1d(coalesce(times, self.timeline, np.unique(self.durations))).astype(float) + times = np.atleast_1d(coalesce(times, self.timeline)).astype(float) n = times.shape[0] times = times.reshape((n, 1)) diff --git a/lifelines/statistics.py b/lifelines/statistics.py index ab08f01ab..ebebd2fd5 100644 --- a/lifelines/statistics.py +++ b/lifelines/statistics.py @@ -852,7 +852,7 @@ def proportional_hazard_test( the fitted Cox model, fitted with `training_df`, you wish to test. Currently only the CoxPHFitter is supported, but later CoxTimeVaryingFitter, too. training_df: DataFrame - the DataFrame used in the call to the Cox model's ``fit``. + the DataFrame used in the call to the Cox model's ``fit``. Optional if providing ``precomputed_residuals`` time_transform: vectorized function, list, or string, optional (default='rank') {'all', 'km', 'rank', 'identity', 'log'} One of the strings above, a list of strings, or a function to transform the time (must accept (time, durations, weights) however). 'all' will present all the transforms. @@ -884,10 +884,6 @@ def proportional_hazard_test( else: scaled_resids = precomputed_residuals - scaled_resids = ( - fitted_cox_model.compute_residuals(training_df, kind="schoenfeld").dot(fitted_cox_model.variance_matrix_) * n_deaths - ) - def compute_statistic(times, resids, n_deaths): demeaned_times = times - times.mean() T = (demeaned_times.values[:, None] * resids.values).sum(0) ** 2 / ( diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index d12d77cff..10acecc32 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -959,6 +959,18 @@ def pass_for_numeric_dtypes_or_raise_array(x): raise ValueError("Values must be numeric: no strings, datetimes, objects, etc.") +def check_scaling(df): + for col in df.columns: + if df[col].mean() > 1e4: + warning_text = dedent( + """Column {0} has a large mean, try centering this to a value closer to 0. + """.format( + col + ) + ) + warnings.warn(warning_text, ConvergenceWarning) + + def check_dimensions(df): n, d = df.shape if d >= n: @@ -1506,7 +1518,7 @@ class StepSizer: """ def __init__(self, initial_step_size: Optional[float]) -> None: - initial_step_size = initial_step_size or 0.95 + initial_step_size = initial_step_size or 0.90 self.initial_step_size = initial_step_size self.step_size = initial_step_size @@ -1514,7 +1526,7 @@ def __init__(self, initial_step_size: Optional[float]) -> None: self.norm_of_deltas: List[float] = [] def update(self, norm_of_delta: float) -> "StepSizer": - SCALE = 1.2 + SCALE = 1.3 LOOKBACK = 3 self.norm_of_deltas.append(norm_of_delta) @@ -1525,10 +1537,10 @@ def update(self, norm_of_delta: float) -> "StepSizer": # Only allow small steps if norm_of_delta >= 15.0: - self.step_size *= 0.25 + self.step_size *= 0.1 self.temper_back_up = True elif 15.0 > norm_of_delta > 5.0: - self.step_size *= 0.75 + self.step_size *= 0.25 self.temper_back_up = True # recent non-monotonically decreasing is a concern From eab417731a769ef7b2e9f49e9ff5abe6c65cf960 Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Tue, 25 Aug 2020 19:23:29 -0400 Subject: [PATCH 3/6] QoL improvements --- CHANGELOG.md | 12 +++++++++ lifelines/fitters/__init__.py | 4 +-- lifelines/fitters/aalen_additive_fitter.py | 2 -- lifelines/fitters/nelson_aalen_fitter.py | 2 -- lifelines/tests/test_estimation.py | 8 ++++++ lifelines/utils/__init__.py | 30 +++++----------------- lifelines/version.py | 2 +- 7 files changed, 29 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bdb4e41df..6c3d19b03 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ ## Changelog +#### 0.25.4 - unreleased + +##### New features + - New baseline estimator for Cox models: ``piecewise`` + - Performance improvements for parametric models `log_likelihood_ratio_test()` and `print_summary()` + - Better step-size defaults for Cox model -> more robust convergence. + + +##### Bug fixes + - fix `check_assumptions` when using formulas. + + #### 0.25.3 - 2020-08-24 ##### New features diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index be89d9af0..b45dc103b 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -1998,8 +1998,8 @@ def _compute_variance_matrix(self) -> np.array: Some ways to possible ways fix this: 0. Are there any lifelines warnings outputted during the `fit`? - 1. Inspect your DataFrame: does everything look as expected? Do you need to add/drop a constant (intercept) column? - 2. Does a particularly large variable need to be centered to 0? + 1. Does a particularly large variable need to be centered to 0? + 2. Inspect your DataFrame: does everything look as expected? Do you need to add/drop a constant (intercept) column? 3. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables. 4. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`. 5. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence. diff --git a/lifelines/fitters/aalen_additive_fitter.py b/lifelines/fitters/aalen_additive_fitter.py index 656793bca..293b78edd 100644 --- a/lifelines/fitters/aalen_additive_fitter.py +++ b/lifelines/fitters/aalen_additive_fitter.py @@ -87,8 +87,6 @@ def __init__(self, fit_intercept=True, alpha=0.05, coef_penalizer=0.0, smoothing self.coef_penalizer = coef_penalizer self.smoothing_penalizer = smoothing_penalizer - if not (0 < alpha <= 1.0): - raise ValueError("alpha parameter must be between 0 and 1.") if coef_penalizer < 0 or smoothing_penalizer < 0: raise ValueError("penalizer parameters must be >= 0.") diff --git a/lifelines/fitters/nelson_aalen_fitter.py b/lifelines/fitters/nelson_aalen_fitter.py index e745c5eb1..8d4cc4de1 100644 --- a/lifelines/fitters/nelson_aalen_fitter.py +++ b/lifelines/fitters/nelson_aalen_fitter.py @@ -57,8 +57,6 @@ class NelsonAalenFitter(UnivariateFitter): def __init__(self, alpha=0.05, nelson_aalen_smoothing=True, **kwargs): super(NelsonAalenFitter, self).__init__(alpha=alpha, **kwargs) - if not (0 < alpha <= 1.0): - raise ValueError("alpha parameter must be between 0 and 1.") self.alpha = alpha self.nelson_aalen_smoothing = nelson_aalen_smoothing diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index 112ff6852..21a7b018d 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -1786,6 +1786,7 @@ def rossi(self): def regression_models_sans_strata_model(self): return [ CoxPHFitter(penalizer=1e-6, baseline_estimation_method="breslow"), + CoxPHFitter(penalizer=1e-6, baseline_estimation_method="piecewise", breakpoints=[15]), CoxPHFitter(penalizer=1e-6, baseline_estimation_method="spline", n_baseline_knots=2), CoxPHFitter(penalizer=1e-6, baseline_estimation_method="spline", n_baseline_knots=3), AalenAdditiveFitter(coef_penalizer=1.0, smoothing_penalizer=1.0), @@ -1804,6 +1805,9 @@ def regression_models(self, regression_models_sans_strata_model): regression_models_sans_strata_model.append( CoxPHFitter(strata=["wexp"], baseline_estimation_method="spline", n_baseline_knots=2) ) + regression_models_sans_strata_model.append( + CoxPHFitter(strata=["wexp"], baseline_estimation_method="piecewise", breakpoints=[15]) + ) return regression_models_sans_strata_model def test_compute_central_values_of_raw_training_data(self): @@ -3178,6 +3182,10 @@ def test_check_assumptions_for_subset_of_columns(self, cph, rossi): cph.check_assumptions(rossi, columns=[]) cph.check_assumptions(rossi, columns=["age", "fin"]) + def test_check_assumptions_with_formuals(self, cph, rossi): + cph.fit(rossi, "week", "arrest", formula="bs(age, df=3) + fin * wexp") + cph.check_assumptions(rossi) + def test_cph_doesnt_modify_original_dataframe(self, cph): df = pd.DataFrame( { diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index 10acecc32..fe6a1191c 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -1731,6 +1731,11 @@ def find_best_parametric_model( weights: an array, or pd.Series, of length n integer weights per observation + + Note + ---------- + Due to instability, the GeneralizedGammaFitter is not tested here. + Returns ---------- tuple of fitted best_model and best_score @@ -1792,7 +1797,6 @@ def find_best_parametric_model( PiecewiseExponentialFitter(knots1[1:-1], label="PiecewiseExponentialFitter: 1 breakpoint"), PiecewiseExponentialFitter(knots2[1:-1], label="PiecewiseExponentialFitter: 2 breakpoint"), PiecewiseExponentialFitter(knots3[1:-1], label="PiecewiseExponentialFitter: 3 breakpoint"), - GeneralizedGammaFitter(), SplineFitter(knots1, label="SplineFitter: 1 internal knot"), SplineFitter(knots2, label="SplineFitter: 2 internal knot"), SplineFitter(knots3, label="SplineFitter: 3 internal knot"), @@ -1939,28 +1943,6 @@ def _string_seed_transform(self, formula: str, df: pd.DataFrame): if self.force_intercept: formula += "+ 1" - try: - _X = patsy.dmatrix(formula, df, 1, NA_action="raise") - - except SyntaxError as e: - import traceback - - column_error = "\n".join(traceback.format_exc().split("\n")[-4:]) - raise FormulaSyntaxError( - ( - """ -It looks like the DataFrame has non-standard column names. See below for which column: + _X = patsy.dmatrix(formula, df, 1, NA_action="raise") -%s - -All columns should either - -i) have no non-traditional characters (this includes spaces and periods) -ii) use `formula=` kwarg in the call to `fit`, and use `Q()` to wrap the column name. - -See more docs here: https://lifelines.readthedocs.io/en/latest/Examples.html#fixing-a-formulasyntaxerror - """ - % column_error - ) - ) return _X.design_info diff --git a/lifelines/version.py b/lifelines/version.py index a4818eb68..4a339281d 100644 --- a/lifelines/version.py +++ b/lifelines/version.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -__version__ = "0.25.3" +__version__ = "0.25.4" From 327b7c3292493b2588e41c38d04d94ec275d4e59 Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Wed, 26 Aug 2020 12:06:59 -0400 Subject: [PATCH 4/6] this feels like an improvements --- docs/Survival Regression.rst | 14 ++++++---- lifelines/exceptions.py | 4 --- lifelines/fitters/__init__.py | 11 +++----- lifelines/fitters/coxph_fitter.py | 44 ++++++++++++++---------------- lifelines/fitters/mixins.py | 4 +-- lifelines/tests/test_estimation.py | 30 +++++++++++++++++++- lifelines/utils/__init__.py | 3 +- lifelines/utils/printer.py | 2 +- 8 files changed, 66 insertions(+), 46 deletions(-) diff --git a/docs/Survival Regression.rst b/docs/Survival Regression.rst index 37b4c763a..0594568dd 100644 --- a/docs/Survival Regression.rst +++ b/docs/Survival Regression.rst @@ -482,12 +482,12 @@ Residuals After fitting a Cox model, we can look back and compute important model residuals. These residuals can tell us about non-linearities not captured, violations of proportional hazards, and help us answer other useful modeling questions. See `Assessing Cox model fit using residuals`_. -Modeling baseline hazard and survival with cubic splines +Modeling baseline hazard and survival with parametric models --------------------------------------------------------------- Normally, the Cox model is *semi-parametric*, which means that its baseline hazard, :math:`h_0(t)`, has no parametric form. This is the default for *lifelines*. However, it is sometimes valuable to produce a parametric baseline instead. A parametric baseline makes survival predictions more efficient, allows for better understanding of baseline behaviour, and allows interpolation/extrapolation. -In *lifelines*, there is an option to fit to a parametric baseline with cubic splines. Cubic splines are highly flexible and can capture the underlying data almost as well as non-parametric methods, and with much more efficiency. +In *lifelines*, there is an option to fit to a parametric baseline with 1) cubic splines, or 2) piecewise constant hazards. Cubic splines are highly flexible and can capture the underlying data almost as well as non-parametric methods, and with much more efficiency. .. code:: python @@ -507,9 +507,11 @@ Below we compare the non-parametric and the fully parametric baseline survivals: .. code:: python cph_semi = CoxPHFitter().fit(rossi, 'week', event_col='arrest') + cph_piecewise = CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[20, 35]).fit(rossi, 'week', event_col='arrest') - ax = cph_spline.baseline_survival_.plot() - cph_semi.baseline_survival_.plot(ax=ax, drawstyle="steps-post") + ax = cph_spline.baseline_cumulative_hazard_.plot() + cph_semi.baseline_cumulative_hazard_.plot(ax=ax, drawstyle="steps-post") + cph_piecewise.baseline_cumulative_hazard_.plot(ax=ax) .. figure:: images/spline_and_semi.png @@ -741,7 +743,7 @@ You read more about and see other examples of the extensions to in the docs for Prediction ----------------------------------------------- -Given a new subject, we ask questions about their future survival? When are they likely to experience the event? What does their survival function look like? The :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter` is able to answer these. If we have modeled the ancillary covariates, we are required to include those as well: +Given a new subject, we'd like to ask questions about their future survival. When are they likely to experience the event? What does their survival function look like? The :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter` is able to answer these. If we have modeled the ancillary covariates, we are required to include those as well: .. code:: python @@ -754,7 +756,7 @@ Given a new subject, we ask questions about their future survival? When are they aft.predict_expectation(X, ancillary=X) -There are two hyper-parameters that can be used to to achieve a better test score. These are ``penalizer`` and ``l1_ratio`` in the call to :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter`. The penalizer is similar to scikit-learn's ``ElasticNet`` model, see their `docs `_. (However, *lifelines* will also accept an array for custom penalizer per variable, see `Cox docs above `_) +There are two hyper-parameters that can be used to to achieve a better test score. These are ``penalizer`` and ``l1_ratio`` in the call to :class:`~lifelines.fitters.weibull_aft_fitter.WeibullAFTFitter`. The penalizer is similar to scikit-learn's ``ElasticNet`` model, see their `docs `_. (However, *lifelines* will also accept an array for custom penalty value per variable, see `Cox docs above `_) .. code:: python diff --git a/lifelines/exceptions.py b/lifelines/exceptions.py index 0078435c6..8c57ef283 100644 --- a/lifelines/exceptions.py +++ b/lifelines/exceptions.py @@ -1,8 +1,4 @@ # -*- coding: utf-8 -*- -class FormulaSyntaxError(SyntaxError): - def __init__(self, message): - self.message = message - super().__init__(self.message) class StatError(Exception): diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index b45dc103b..ad6757914 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -1221,7 +1221,7 @@ def _compute_central_values_of_raw_training_data(self, df, strata=None, name="ba - Numerics are transformed to their median value. """ if df.size == 0: - return None + return pd.DataFrame(index=["baseline"]) if strata is not None: # apply this function within each stratified dataframe @@ -2064,10 +2064,6 @@ def _compute_confidence_intervals(self) -> pd.DataFrame: columns=["%g%% lower-bound" % ci, "%g%% upper-bound" % ci], ) - @property - def _ll_null_dof(self): - return len(self._fitted_parameter_names) - @property def _ll_null(self): if hasattr(self, "_ll_null_"): @@ -2075,13 +2071,13 @@ def _ll_null(self): regressors = {name: "1" for name in self._fitted_parameter_names} - # we can reuse the final values from the full fit for this smaller fit. + # we can reuse the final values from the full fit for this partial fit. initial_point = {} for name in self._fitted_parameter_names: try: initial_point[name] = self.params_[name]["Intercept"] except: - initial_point[name] = 0.0 + initial_point[name] = 0.0001 df = pd.DataFrame({"entry": self.entry, "w": self.weights}) @@ -2108,6 +2104,7 @@ def _ll_null(self): model.fit_left_censoring( df, "T", "E", entry_col="entry", weights_col="w", regressors=regressors, initial_point=initial_point ) + self._ll_null_dof = model.params_.shape[0] self._ll_null_ = model.log_likelihood_ return self._ll_null_ diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index 2d7270cd4..4dd65a991 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -72,7 +72,7 @@ class CoxPHFitter(RegressionFitter, ProportionalHazardMixin): n_baseline_knots: int Used when ``baseline_estimation_method="spline"`. Set the number of knots (interior & exterior) in the baseline hazard. Should be atleast 2. Royston et. al, the authors - of this model, suggest 4 to start, but any values between 2 and 6 are reasonable. + of this model, suggest 4 to start, but any values between 2 and 8 are reasonable. breakpoints: int Used when ``baseline_estimation_method="piecewise"`. Set the positions of the baseline hazard breakpoints. @@ -340,7 +340,7 @@ def _fit_model_piecewise(self, *args, **kwargs): strata = kwargs.pop("strata") if strata is None: - regressors = {**{"beta_": formula}, **{"log_lambda%d_" % i: "1" for i in range(1, len(self.breakpoints) + 2)}} + regressors = {**{"beta_": formula}, **{"log_lambda%d_" % i: "1" for i in range(2, len(self.breakpoints) + 2)}} strata_values = None elif isinstance(strata, (list, str)): strata_namer = ParametricPiecewiseBaselinePHFitter._strata_labeler @@ -352,7 +352,7 @@ def _fit_model_piecewise(self, *args, **kwargs): strata_values = df.groupby(strata).size().index.tolist() regressors = {"beta_": formula} for stratum in strata_values: - regressors.update({strata_namer(stratum, i): "1" for i in range(1, len(self.breakpoints) + 2)}) + regressors.update({strata_namer(stratum, i): "1" for i in range(2, len(self.breakpoints) + 2)}) else: raise ValueError("Wrong type for strata. String, None, or list of strings") @@ -2414,7 +2414,7 @@ def concordance_index_(self) -> float: @property def AIC_(self): raise exceptions.StatError( - "Since the model is semi-parametric (and not fully-parametric), the AIC does not exist. You probably want the `.AIC_partial_` property instead" + "Since the model is semi-parametric (and not fully-parametric), the AIC does not exist. You probably want the `.AIC_partial_` property instead." ) @@ -2596,7 +2596,7 @@ def predict_hazard(self, df, *, conditional_after=None, times=None): @property def AIC_partial_(self): raise exceptions.StatError( - "Since the spline model is fully parametric (and not semi-parametric), the partial AIC does not exist. You probably want the `.AIC_` property instead" + "Since the spline model is fully parametric (and not semi-parametric), the partial AIC does not exist. You probably want the `.AIC_` property instead." ) @@ -2604,7 +2604,7 @@ class ParametricSplinePHFitter(ParametricCoxModelFitter, SplineFitterMixin): r""" Proportional hazard model with cubic splines model for the baseline hazard. - .. math:: h(t|x) = h_0(t) \exp((x - \overline{x})' \beta) + .. math:: h(t|x) = h_0(t) \exp(x' \beta) where @@ -2625,7 +2625,6 @@ class ParametricSplinePHFitter(ParametricCoxModelFitter, SplineFitterMixin): _scipy_fit_options = {"maxiter": 1000, "iprint": 100} _FAST_MEDIAN_PREDICT = False - fit_intercept = True def __init__(self, strata, strata_values, n_baseline_knots=1, *args, **kwargs): @@ -2668,14 +2667,14 @@ def _create_initial_point(self, Ts, E, entries, weights, Xs): params.update( {self._strata_labeler(stratum, 1): np.array([0.05]), self._strata_labeler(stratum, 2): np.array([-0.05])} ) - params.update({self._strata_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}) + params.update({self._strata_labeler(stratum, i): np.array([0.01]) for i in range(3, self.n_baseline_knots + 1)}) return params else: return { **{"beta_": np.zeros(len(Xs["beta_"].columns)), "phi1_": np.array([0.05]), "phi2_": np.array([-0.05])}, - **{"phi%d_" % i: np.array([0.0]) for i in range(3, self.n_baseline_knots + 1)}, + **{"phi%d_" % i: np.array([0.01]) for i in range(3, self.n_baseline_knots + 1)}, } def _cumulative_hazard_with_strata(self, params, T, Xs): @@ -2728,13 +2727,12 @@ class ParametricPiecewiseBaselinePHFitter(ParametricCoxModelFitter, Proportional where .. math:: h_0(t) = \begin{cases} - 1/\lambda_0 & \text{if $t \le \tau_0$} \\ - 1/\lambda_1 & \text{if $\tau_0 < t \le \tau_1$} \\ - 1/\lambda_2 & \text{if $\tau_1 < t \le \tau_2$} \\ + exp{\beta \cdot \text{center}(x)} & \text{if $t \le \tau_0$} \\ + exp{\beta \cdot \text{center}(x)} \cdot lambda_1 & \text{if $\tau_0 < t \le \tau_1$} \\ + exp{\beta \cdot \text{center}(x)} \cdot lambda_2 & \text{if $\tau_1 < t \le \tau_2$} \\ ... \end{cases} - Note ------- This is a "hidden" class that is invoked when using ``baseline_estimation_method="piecewise"``. You probably want to use ``CoxPHFitter``, not this. @@ -2744,7 +2742,6 @@ class ParametricPiecewiseBaselinePHFitter(ParametricCoxModelFitter, Proportional _FAST_MEDIAN_PREDICT = False cluster_col = None - force_no_intercept = True def __init__(self, strata, strata_values, breakpoints, *args, **kwargs): self.strata = strata @@ -2770,7 +2767,8 @@ def _fitted_parameter_names(self): names += [self._strata_labeler(stratum, i) for i in range(1, self.n_breakpoints + 2)] return names else: - return ["beta_"] + ["log_lambda%d_" % i for i in range(1, self.n_breakpoints + 2)] + # return ["beta_"] + ["log_lambda%d_" % i for i in range(1, self.n_breakpoints + 2)] + return ["beta_"] + ["log_lambda%d_" % i for i in range(2, self.n_breakpoints + 2)] def _create_initial_point(self, Ts, E, entries, weights, Xs): # Some non-zero initial points. This is important as it nudges the model slightly away from the degenerate all-zeros model. Try setting it to 0, and watch the model fail to converge. @@ -2786,11 +2784,7 @@ def _create_initial_point(self, Ts, E, entries, weights, Xs): else: return { - **{ - "beta_": np.zeros(len(Xs["beta_"].columns)), - "log_lambda1_": np.array([0.05]), - "log_lambda2_": np.array([-0.05]), - }, + **{"beta_": np.zeros(len(Xs["beta_"].columns)), "log_lambda2_": np.array([-0.05])}, **{"log_lambda%d_" % i: np.array([0.0]) for i in range(3, self.n_breakpoints + 2)}, } @@ -2815,7 +2809,9 @@ def _cumulative_hazard_with_strata(self, params, T, Xs): bps = anp.append(self.breakpoints, [anp.inf]) M = anp.minimum(anp.tile(bps, (n, 1)), T_) M = anp.hstack([M[:, tuple([0])], anp.diff(M, axis=1)]) - log_lambdas_ = anp.array([params[self._strata_labeler(stratum, i)] for i in range(1, self.n_breakpoints + 2)]) + log_lambdas_ = anp.array( + [0] + [params[self._strata_labeler(stratum, i)][0] for i in range(2, self.n_breakpoints + 2)] + ) H_ = partial_hazard * (M * anp.exp(log_lambdas_).T).sum(1) output.append(H_) @@ -2830,7 +2826,7 @@ def _cumulative_hazard_sans_strata(self, params, T, Xs): bps = anp.append(self.breakpoints, [anp.inf]) M = anp.minimum(anp.tile(bps, (n, 1)), T) M = anp.hstack([M[:, tuple([0])], anp.diff(M, axis=1)]) - log_lambdas_ = anp.array([params[param] for param in self._fitted_parameter_names if param != "beta_"]) + log_lambdas_ = anp.array([0.0] + [params[param][0] for param in self._fitted_parameter_names if param != "beta_"]) return partial_hazard * (M * anp.exp(log_lambdas_).T).sum(1) def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> pd.DataFrame: @@ -2876,7 +2872,7 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p for stratum, stratified_X in df.groupby(self.strata): log_lambdas_ = anp.array( - [self.params_[self._strata_labeler(stratum, i)] for i in range(1, self.n_breakpoints + 2)] + [0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(1, self.n_breakpoints + 2)] ) lambdas_ = np.exp(log_lambdas_) @@ -2890,7 +2886,7 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p return cumulative_hazard else: - log_lambdas_ = np.array([self.params_[param] for param in self._fitted_parameter_names if param != "beta_"]) + log_lambdas_ = np.array([0] + [self.params_[param][0] for param in self._fitted_parameter_names if param != "beta_"]) lambdas_ = np.exp(log_lambdas_) Xs = self.regressors.transform_df(df) diff --git a/lifelines/fitters/mixins.py b/lifelines/fitters/mixins.py index 4eab6ddbb..90663b1b8 100644 --- a/lifelines/fitters/mixins.py +++ b/lifelines/fitters/mixins.py @@ -95,7 +95,7 @@ def check_assumptions( test_results = proportional_hazard_test(self, training_df, time_transform=["rank", "km"], precomputed_residuals=residuals) residuals_and_duration = residuals.join(training_df[self.duration_col]) - Xs = self.regressors.transform_df(df) + Xs = self.regressors.transform_df(training_df) counter = 0 n = residuals_and_duration.shape[0] @@ -178,7 +178,7 @@ def check_assumptions( ) if show_plots: - print("Bootstrapping lowess lines...") + print("Bootstrapping lowess lines. May take a moment...") from matplotlib import pyplot as plt fig = plt.figure() diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index 21a7b018d..fe9d3db4a 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -1815,7 +1815,7 @@ def test_compute_central_values_of_raw_training_data(self): central_values = RegressionFitter()._compute_central_values_of_raw_training_data empty_df = pd.DataFrame([]) - assert central_values(empty_df) is None + assert_frame_equal(central_values(empty_df), pd.DataFrame(index=["baseline"])) all_categorical = pd.DataFrame([{"var1": "A", "var2": "C"}, {"var1": "B", "var2": "C"}, {"var1": "B", "var2": "C"}]) assert_frame_equal(central_values(all_categorical), pd.DataFrame([{"var1": "B", "var2": "C"}], index=["baseline"])) @@ -2836,6 +2836,34 @@ def test_efron_newtons_method(self, data_nus, cph): assert np.abs(newton(X, T, E, W, entries)[0] - -0.0335) < 0.0001 +class TestCoxPHFitterPeices: + @pytest.fixture + def cph(self): + return CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[25]) + + def test_baseline_hazard_has_correct_functional_form(self, cph, rossi): + cph.fit(rossi, "week", "arrest", formula="fin") + bhz = cph.baseline_hazard_.loc[1, "baseline hazard"] + + npt.assert_allclose( + bhz, + np.exp( + cph.summary.loc[("beta_", "Intercept"), "coef"] + + cph.summary.loc[("beta_", "fin"), "coef"] * cph._central_values.loc["baseline", "fin"] + ), + ) + + bhz = cph.baseline_hazard_.loc[rossi["week"].max(), "baseline hazard"] + npt.assert_allclose( + bhz, + np.exp( + cph.summary.loc[("beta_", "Intercept"), "coef"] + + cph.summary.loc[("beta_", "fin"), "coef"] * cph._central_values.loc["baseline", "fin"] + ) + * np.exp(cph.summary.loc[("log_lambda2_", "Intercept"), "coef"]), + ) + + class TestCoxPHFitter: @pytest.fixture def cph(self): diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index fe6a1191c..1ef42fed2 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -18,7 +18,7 @@ import pandas as pd from lifelines.utils.concordance import concordance_index -from lifelines.exceptions import ConvergenceWarning, ApproximationWarning, ConvergenceError, FormulaSyntaxError +from lifelines.exceptions import ConvergenceWarning, ApproximationWarning, ConvergenceError __all__ = [ @@ -1922,6 +1922,7 @@ def transform_df(self, df: pd.DataFrame): # we can't concat empty dataframes and return a column MultiIndex, # so we create a "fake" dataframe (acts like a dataframe) to return. + # This should be removed because it's gross. if Xs_df.size == 0: return {p: pd.DataFrame(index=df.index) for p in self.mappings.keys()} else: diff --git a/lifelines/utils/printer.py b/lifelines/utils/printer.py index 244f2da0e..7c0520ce2 100644 --- a/lifelines/utils/printer.py +++ b/lifelines/utils/printer.py @@ -156,7 +156,7 @@ def to_ascii(self): ) if second_row_set: - repr_string += "\n" + repr_string += "\n\n" repr_string += df[columns].to_string( float_format=utils.format_floats(decimals), formatters={ From 22e6ceda7479cdb1daa1cd3f74b1cd3cf0ade306 Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Wed, 26 Aug 2020 14:16:22 -0400 Subject: [PATCH 5/6] tests pass --- lifelines/fitters/__init__.py | 5 +- lifelines/fitters/coxph_fitter.py | 14 ++-- lifelines/tests/test_estimation.py | 106 ++++++++++++++++++++--------- 3 files changed, 84 insertions(+), 41 deletions(-) diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index ad6757914..a62004c7c 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -2064,6 +2064,10 @@ def _compute_confidence_intervals(self) -> pd.DataFrame: columns=["%g%% lower-bound" % ci, "%g%% upper-bound" % ci], ) + @property + def _ll_null_dof(self): + return len(self._fitted_parameter_names) + @property def _ll_null(self): if hasattr(self, "_ll_null_"): @@ -2104,7 +2108,6 @@ def _ll_null(self): model.fit_left_censoring( df, "T", "E", entry_col="entry", weights_col="w", regressors=regressors, initial_point=initial_point ) - self._ll_null_dof = model.params_.shape[0] self._ll_null_ = model.log_likelihood_ return self._ll_null_ diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index 4dd65a991..ea079e764 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -2740,6 +2740,7 @@ class ParametricPiecewiseBaselinePHFitter(ParametricCoxModelFitter, Proportional _KNOWN_MODEL = True _FAST_MEDIAN_PREDICT = False + fit_intercept = True cluster_col = None @@ -2764,10 +2765,9 @@ def _fitted_parameter_names(self): if self.strata is not None: names = ["beta_"] for stratum in self.strata_values: - names += [self._strata_labeler(stratum, i) for i in range(1, self.n_breakpoints + 2)] + names += [self._strata_labeler(stratum, i) for i in range(2, self.n_breakpoints + 2)] return names else: - # return ["beta_"] + ["log_lambda%d_" % i for i in range(1, self.n_breakpoints + 2)] return ["beta_"] + ["log_lambda%d_" % i for i in range(2, self.n_breakpoints + 2)] def _create_initial_point(self, Ts, E, entries, weights, Xs): @@ -2775,9 +2775,7 @@ def _create_initial_point(self, Ts, E, entries, weights, Xs): if self.strata is not None: params = {"beta_": np.zeros(len(Xs["beta_"].columns))} for stratum in self.strata_values: - params.update( - {self._strata_labeler(stratum, 1): np.array([0.05]), self._strata_labeler(stratum, 2): np.array([-0.05])} - ) + params.update({self._strata_labeler(stratum, 2): np.array([-0.05])}) params.update({self._strata_labeler(stratum, i): np.array([0.0]) for i in range(3, self.n_breakpoints + 2)}) return params @@ -2872,14 +2870,14 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p for stratum, stratified_X in df.groupby(self.strata): log_lambdas_ = anp.array( - [0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(1, self.n_breakpoints + 2)] + [0] + [self.params_[self._strata_labeler(stratum, i)][0] for i in range(2, self.n_breakpoints + 2)] ) lambdas_ = np.exp(log_lambdas_) Xs_ = self.regressors.transform_df(stratified_X) partial_hazard = np.exp(np.dot(Xs_["beta_"], self.params_["beta_"])) - cumulative_hazard_ = pd.DataFrame(partial_hazard * np.dot(M, lambdas_), index=times[:, 0]) + cumulative_hazard_ = pd.DataFrame(np.outer(np.dot(M, lambdas_), partial_hazard), index=times[:, 0]) cumulative_hazard_.columns = stratified_X["index"] cumulative_hazard = cumulative_hazard.merge(cumulative_hazard_, how="outer", right_index=True, left_index=True) @@ -2891,7 +2889,7 @@ def predict_cumulative_hazard(self, df, times=None, conditional_after=None) -> p Xs = self.regressors.transform_df(df) partial_hazard = np.exp(np.dot(Xs["beta_"], self.params_["beta_"])) - return pd.DataFrame(partial_hazard * np.dot(M, lambdas_), columns=utils._get_index(df), index=times[:, 0]) + return pd.DataFrame(np.outer(np.dot(M, lambdas_), partial_hazard), columns=utils._get_index(df), index=times[:, 0]) class _BatchVsSingle: diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index fe9d3db4a..f24a3ea6b 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -630,7 +630,6 @@ def test_typeerror_is_thrown_if_there_is_nans_in_the_event_col(self, univariate_ with pytest.raises(TypeError): fitter().fit(T, E) - @pytest.mark.xfail def test_pickle_serialization(self, positive_sample_lifetimes, univariate_fitters): T = positive_sample_lifetimes[0] for f in univariate_fitters: @@ -641,7 +640,6 @@ def test_pickle_serialization(self, positive_sample_lifetimes, univariate_fitter dif = (fitter.durations - unpickled.durations).sum() assert dif == 0 - @pytest.mark.xfail def test_dill_serialization(self, positive_sample_lifetimes, univariate_fitters): from dill import dumps, loads @@ -654,7 +652,6 @@ def test_dill_serialization(self, positive_sample_lifetimes, univariate_fitters) dif = (fitter.durations - unpickled.durations).sum() assert dif == 0 - @pytest.mark.xfail def test_joblib_serialization(self, positive_sample_lifetimes, univariate_fitters): from joblib import dump, load @@ -2863,6 +2860,10 @@ def test_baseline_hazard_has_correct_functional_form(self, cph, rossi): * np.exp(cph.summary.loc[("log_lambda2_", "Intercept"), "coef"]), ) + def test_trivial_model_doesnt_fail(self, cph, rossi): + cph.fit(rossi[["week", "arrest"]], "week", "arrest") + cph.baseline_hazard_ + class TestCoxPHFitter: @pytest.fixture @@ -2873,14 +2874,24 @@ def cph(self): def cph_spline(self): return CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=2) - def test_spline_strata_null_dof(self, cph_spline, rossi): + @pytest.fixture + def cph_pieces(self): + return CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[25]) + + def test_parametric_strata_null_dof(self, cph_spline, cph_pieces, rossi): cph_spline.fit(rossi, "week", "arrest", strata="paro", formula="age") assert cph_spline._ll_null_dof < cph_spline.params_.shape[0] - def test_spline_strata_score(self, cph_spline, rossi): + cph_pieces.fit(rossi, "week", "arrest", strata="paro", formula="age") + assert cph_pieces._ll_null_dof < cph_pieces.params_.shape[0] + + def test_parametric_strata_score(self, cph_spline, cph_pieces, rossi): cph_spline.fit(rossi, "week", "arrest", strata="paro", formula="age") cph_spline.score(rossi) + cph_pieces.fit(rossi, "week", "arrest", strata="paro", formula="age") + cph_pieces.score(rossi) + def test_categorical_variables_are_still_encoded_correctly(self, cph): """ We must drop the intercept in the design matrix, but still have proper dummy encoding @@ -2928,7 +2939,7 @@ def test_entry_col_against_R(self, cph): npt.assert_allclose(cph.summary.loc["AIDSY", "se(coef)"], 0.24630, rtol=3) npt.assert_allclose(cph.log_likelihood_, -95.15478, rtol=2) - def test_formulas_can_be_used_for_inference(self, rossi, cph, cph_spline): + def test_formulas_can_be_used_for_inference(self, rossi, cph, cph_spline, cph_pieces): cph.fit(rossi, "week", "arrest", formula="age + race") assert cph.summary.index.tolist() == ["age", "race"] @@ -2941,13 +2952,24 @@ def test_formulas_can_be_used_for_inference(self, rossi, cph, cph_spline): cph_spline.fit(rossi, "week", "arrest", formula="age * race") assert cph_spline.summary.loc["beta_"].index.tolist() == ["Intercept", "age", "race", "age:race"] - def test_formulas_can_be_used_with_prediction(self, rossi, cph, cph_spline): + cph_pieces.fit(rossi, "week", "arrest", formula="age + race") + assert cph_pieces.summary.loc["beta_"].index.tolist() == ["Intercept", "age", "race"] + + cph_pieces.fit(rossi, "week", "arrest", formula="age * race") + assert cph_pieces.summary.loc["beta_"].index.tolist() == ["Intercept", "age", "race", "age:race"] + + @pytest.mark.parametrize( + "cph", + [ + CoxPHFitter(), + CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3), + CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[25]), + ], + ) + def test_formulas_can_be_used_with_prediction(self, rossi, cph): cph.fit(rossi, "week", "arrest", formula="age * race") cph.predict_survival_function(rossi) - cph_spline.fit(rossi, "week", "arrest", formula="age * race") - cph_spline.predict_survival_function(rossi) - def test_timeline_argument_can_be_set(self, rossi, cph_spline, cph): timeline = np.linspace(0, 100) cph.fit(rossi, "week", "arrest", timeline=timeline) @@ -2990,7 +3012,14 @@ def test_model_can_accept_null_covariates(self, cph, rossi): cph.fit(rossi[["week", "arrest"]], "week", "arrest") assert True - def test_spline_model_can_handle_specific_outliers(self, cph_spline): + @pytest.mark.parametrize( + "cph", + [ + CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3), + CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[25]), + ], + ) + def test_parameterized_model_can_handle_specific_outliers(self, cph): # https://github.com/CamDavidsonPilon/lifelines/issues/965 # Generating random correlated data @@ -3004,11 +3033,10 @@ def test_spline_model_can_handle_specific_outliers(self, cph_spline): test_data = pd.DataFrame({"Days": days, "Cov1": cov1, "Cov2": cov2}) test_data = test_data[test_data["Days"] > 0] - cph_sp = CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3) - cph_sp.fit(test_data, duration_col="Days") + cph.fit(test_data, duration_col="Days") # check survival is always decreasing - assert np.all(cph_sp.baseline_survival_.diff().dropna() < 0) + assert np.all(cph.baseline_survival_.diff().dropna() < 0) def test_spline_and_breslow_models_offer_very_comparible_baseline_survivals(self, rossi): cph_breslow = CoxPHFitter().fit(rossi, "week", "arrest") @@ -3037,20 +3065,27 @@ def test_penalty_term_is_used_in_log_likelihood_value(self, rossi): .log_likelihood_ ) - def test_strata_estimation_for_spline(self, rossi, cph_spline): - cph_spline.fit(rossi, "week", "arrest", strata="wexp") + @pytest.mark.parametrize( + "cph", + [ + CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3), + CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[25]), + ], + ) + def test_strata_estimation_for_parameterized(self, rossi, cph): + cph.fit(rossi, "week", "arrest", strata="wexp") - assert cph_spline.baseline_cumulative_hazard_.shape[1] == 2 - assert cph_spline.baseline_hazard_.shape[1] == 2 - assert cph_spline.baseline_survival_.shape[1] == 2 + assert cph.baseline_cumulative_hazard_.shape[1] == 2 + assert cph.baseline_hazard_.shape[1] == 2 + assert cph.baseline_survival_.shape[1] == 2 - cph_spline.fit(rossi, "week", "arrest", strata=["wexp", "paro"]) + cph.fit(rossi, "week", "arrest", strata=["wexp", "paro"]) - assert cph_spline.baseline_cumulative_hazard_.shape[1] == 4 - assert cph_spline.baseline_hazard_.shape[1] == 4 - assert cph_spline.baseline_survival_.shape[1] == 4 + assert cph.baseline_cumulative_hazard_.shape[1] == 4 + assert cph.baseline_hazard_.shape[1] == 4 + assert cph.baseline_survival_.shape[1] == 4 - def test_strata_estimation_is_same_if_using_trivial_strata(self, rossi, cph_spline): + def test_strata_estimation_is_same_if_using_trivial_strata(self, rossi): rossi["strata"] = "a" trivial_strata_cph = CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3) trivial_strata_cph.fit(rossi, "week", "arrest", strata="strata") @@ -3068,16 +3103,23 @@ def test_strata_estimation_is_same_if_using_trivial_strata(self, rossi, cph_spli trivial_strata_cph.summary.loc[[("beta_", "Intercept"), ("sa_phi1_", "Intercept")]].reset_index(drop=True), ) - def test_baseline_estimation_for_spline(self, rossi, cph_spline): - cph_spline.fit(rossi, "week", "arrest") + @pytest.mark.parametrize( + "cph", + [ + CoxPHFitter(baseline_estimation_method="spline", n_baseline_knots=3), + CoxPHFitter(baseline_estimation_method="piecewise", breakpoints=[25]), + ], + ) + def test_baseline_estimation_for_parameteric(self, rossi, cph): + cph.fit(rossi, "week", "arrest") - assert isinstance(cph_spline.baseline_survival_, pd.DataFrame) - assert list(cph_spline.baseline_survival_.columns) == ["baseline survival"] - assert list(cph_spline.baseline_cumulative_hazard_.columns) == ["baseline cumulative hazard"] + assert isinstance(cph.baseline_survival_, pd.DataFrame) + assert list(cph.baseline_survival_.columns) == ["baseline survival"] + assert list(cph.baseline_cumulative_hazard_.columns) == ["baseline cumulative hazard"] - assert cph_spline.baseline_survival_at_times([1, 2, 3]).shape[0] == 3 - assert cph_spline.baseline_cumulative_hazard_at_times([1, 2, 3]).shape[0] == 3 - assert cph_spline.baseline_hazard_at_times([1, 2, 3]).shape[0] == 3 + assert cph.baseline_survival_at_times([1, 2, 3]).shape[0] == 3 + assert cph.baseline_cumulative_hazard_at_times([1, 2, 3]).shape[0] == 3 + assert cph.baseline_hazard_at_times([1, 2, 3]).shape[0] == 3 def test_conditional_after_in_prediction(self, rossi, cph): rossi.loc[rossi["week"] == 1, "week"] = 0 From 3cc6db661518848af0791f1cec10acbfb0e26522 Mon Sep 17 00:00:00 2001 From: CamDavidsonPilon Date: Wed, 26 Aug 2020 14:21:35 -0400 Subject: [PATCH 6/6] changelog --- CHANGELOG.md | 2 +- docs/Changelog.rst | 428 ++++++++++++++++++++++++--------------------- 2 files changed, 226 insertions(+), 204 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c3d19b03..a5dbcca69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## Changelog -#### 0.25.4 - unreleased +#### 0.25.4 - 2020-08-26 ##### New features - New baseline estimator for Cox models: ``piecewise`` diff --git a/docs/Changelog.rst b/docs/Changelog.rst index 3dfd72fc1..3408dcf9e 100644 --- a/docs/Changelog.rst +++ b/docs/Changelog.rst @@ -1,9 +1,29 @@ Changelog --------- +0.25.4 - 2020-08-26 +^^^^^^^^^^^^^^^^^^^ + +New features +'''''''''''' + +- New baseline estimator for Cox models: ``piecewise`` +- Performance improvements for parametric models + ``log_likelihood_ratio_test()`` and ``print_summary()`` +- Better step-size defaults for Cox model -> more robust convergence. + +Bug fixes +''''''''' + +- fix ``check_assumptions`` when using formulas. + +.. _section-1: + 0.25.3 - 2020-08-24 ^^^^^^^^^^^^^^^^^^^ +.. _new-features-1: + New features '''''''''''' @@ -17,6 +37,8 @@ API Changes - See note on ``survival_difference_at_fixed_point_in_time_test`` above. +.. _bug-fixes-1: + Bug fixes ''''''''' @@ -24,12 +46,12 @@ Bug fixes - fix Python error when calling ``plot_covariate_groups`` - fix dtype mismatches in ``plot_partial_effects_on_outcome``. -.. _section-1: +.. _section-2: 0.25.2 - 2020-08-08 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-1: +.. _new-features-2: New features '''''''''''' @@ -49,7 +71,7 @@ API Changes the author.). So add 2 to ``n_baseline_knots`` to recover the identical model as previously. -.. _bug-fixes-1: +.. _bug-fixes-2: Bug fixes ''''''''' @@ -58,12 +80,12 @@ Bug fixes - fix some exception imports I missed. - fix log-likelihood p-value in splines ``CoxPHFitter`` -.. _section-2: +.. _section-3: 0.25.1 - 2020-08-01 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-2: +.. _bug-fixes-3: Bug fixes ''''''''' @@ -74,12 +96,12 @@ Bug fixes - put ``patsy`` as a proper dependency. - suppress some Pandas 1.1 warnings. -.. _section-3: +.. _section-4: 0.25.0 - 2020-07-27 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-2: +.. _new-features-3: New features '''''''''''' @@ -136,7 +158,7 @@ API Changes `here `__. - all exceptions and warnings have moved to ``lifelines.exceptions`` -.. _bug-fixes-3: +.. _bug-fixes-4: Bug fixes ''''''''' @@ -152,12 +174,12 @@ Bug fixes - fixed NaN bug in ``survival_table_from_events`` with intervals when no events would occur in a interval. -.. _section-4: +.. _section-5: 0.24.16 - 2020-07-09 ^^^^^^^^^^^^^^^^^^^^ -.. _new-features-3: +.. _new-features-4: New features '''''''''''' @@ -165,19 +187,19 @@ New features - improved algorithm choice for large DataFrames for Cox models. Should see a significant performance boost. -.. _bug-fixes-4: +.. _bug-fixes-5: Bug fixes ''''''''' - fixed ``utils.median_survival_time`` not accepting Pandas Series. -.. _section-5: +.. _section-6: 0.24.15 - 2020-07-07 ^^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-5: +.. _bug-fixes-6: Bug fixes ''''''''' @@ -188,12 +210,12 @@ Bug fixes - fixed bug where using ``conditional_after`` and ``times`` in ``CoxPHFitter("spline")`` prediction methods would be ignored. -.. _section-6: +.. _section-7: 0.24.14 - 2020-07-02 ^^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-6: +.. _bug-fixes-7: Bug fixes ''''''''' @@ -205,12 +227,12 @@ Bug fixes - fixed a bug where some columns would not be displayed in ``print_summary`` -.. _section-7: +.. _section-8: 0.24.13 - 2020-06-22 ^^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-7: +.. _bug-fixes-8: Bug fixes ''''''''' @@ -220,24 +242,24 @@ Bug fixes - fixed a bug where ``CoxPHFitter`` would fail with working with ``sklearn_adapter`` -.. _section-8: +.. _section-9: 0.24.12 - 2020-06-20 ^^^^^^^^^^^^^^^^^^^^ -.. _new-features-4: +.. _new-features-5: New features '''''''''''' - improved convergence of ``GeneralizedGamma(Regression)Fitter``. -.. _section-9: +.. _section-10: 0.24.11 - 2020-06-17 ^^^^^^^^^^^^^^^^^^^^ -.. _new-features-5: +.. _new-features-6: New features '''''''''''' @@ -260,12 +282,12 @@ API Changes penalized by ``penalizer`` - we now penalizing everything except intercept terms in linear relationships. -.. _section-10: +.. _section-11: 0.24.10 - 2020-06-16 ^^^^^^^^^^^^^^^^^^^^ -.. _new-features-6: +.. _new-features-7: New features '''''''''''' @@ -282,7 +304,7 @@ API Changes - Related to above: the fitted spline parameters are now available in the ``.summary`` and ``.print_summary`` methods. -.. _bug-fixes-8: +.. _bug-fixes-9: Bug fixes ''''''''' @@ -290,12 +312,12 @@ Bug fixes - fixed a bug in initialization of some interval-censoring models -> better convergence. -.. _section-11: +.. _section-12: 0.24.9 - 2020-06-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-7: +.. _new-features-8: New features '''''''''''' @@ -305,7 +327,7 @@ New features ``tarone-ware``, ``peto``, ``fleming-harrington``. Thanks @sean-reed - new interval censored dataset: ``lifelines.datasets.load_mice`` -.. _bug-fixes-9: +.. _bug-fixes-10: Bug fixes ''''''''' @@ -313,12 +335,12 @@ Bug fixes - Cleared up some mislabeling in ``plot_loglogs``. Thanks @sean-reed! - tuples are now able to be used as input in univariate models. -.. _section-12: +.. _section-13: 0.24.8 - 2020-05-17 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-8: +.. _new-features-9: New features '''''''''''' @@ -327,12 +349,12 @@ New features Not all edge cases are fully checked, and some features are missing. Try it under ``KaplanMeierFitter.fit_interval_censoring`` -.. _section-13: +.. _section-14: 0.24.7 - 2020-05-17 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-9: +.. _new-features-10: New features '''''''''''' @@ -348,12 +370,12 @@ New features - some convergence tweaks which should help recent performance regressions. -.. _section-14: +.. _section-15: 0.24.6 - 2020-05-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-10: +.. _new-features-11: New features '''''''''''' @@ -363,7 +385,7 @@ New features - New ``lifelines.plotting.plot_interval_censored_lifetimes`` for plotting interval censored data - thanks @sean-reed! -.. _bug-fixes-10: +.. _bug-fixes-11: Bug fixes ''''''''' @@ -371,19 +393,19 @@ Bug fixes - fixed bug where ``cdf_plot`` and ``qq_plot`` were not factoring in the weights correctly. -.. _section-15: +.. _section-16: 0.24.5 - 2020-05-01 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-11: +.. _new-features-12: New features '''''''''''' - ``plot_lifetimes`` accepts pandas Series. -.. _bug-fixes-11: +.. _bug-fixes-12: Bug fixes ''''''''' @@ -393,12 +415,12 @@ Bug fixes - Improved ``at_risk_counts`` for subplots. - More data validation checks for ``CoxTimeVaryingFitter`` -.. _section-16: +.. _section-17: 0.24.4 - 2020-04-13 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-12: +.. _bug-fixes-13: Bug fixes ''''''''' @@ -407,12 +429,12 @@ Bug fixes - setting a dataframe in ``ancillary_df`` works for interval censoring - ``.score`` works for interval censored models -.. _section-17: +.. _section-18: 0.24.3 - 2020-03-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-12: +.. _new-features-13: New features '''''''''''' @@ -422,7 +444,7 @@ New features the hazard ratio would be at previous times. This is useful because the final hazard ratio is some weighted average of these. -.. _bug-fixes-13: +.. _bug-fixes-14: Bug fixes ''''''''' @@ -430,12 +452,12 @@ Bug fixes - Fixed error in HTML printer that was hiding concordance index information. -.. _section-18: +.. _section-19: 0.24.2 - 2020-03-15 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-14: +.. _bug-fixes-15: Bug fixes ''''''''' @@ -447,12 +469,12 @@ Bug fixes - Fixed a keyword bug in ``plot_covariate_groups`` for parametric models. -.. _section-19: +.. _section-20: 0.24.1 - 2020-03-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-13: +.. _new-features-14: New features '''''''''''' @@ -460,14 +482,14 @@ New features - Stability improvements for GeneralizedGammaRegressionFitter and CoxPHFitter with spline estimation. -.. _bug-fixes-15: +.. _bug-fixes-16: Bug fixes ''''''''' - Fixed bug with plotting hazards in NelsonAalenFitter. -.. _section-20: +.. _section-21: 0.24.0 - 2020-02-20 ^^^^^^^^^^^^^^^^^^^ @@ -476,7 +498,7 @@ This version and future versions of lifelines no longer support py35. Pandas 1.0 is fully supported, along with previous versions. Minimum Scipy has been bumped to 1.2.0. -.. _new-features-14: +.. _new-features-15: New features '''''''''''' @@ -528,7 +550,7 @@ API Changes to ``scoring_method``. - removed ``_score_`` and ``path`` from Cox model. -.. _bug-fixes-16: +.. _bug-fixes-17: Bug fixes ''''''''' @@ -541,12 +563,12 @@ Bug fixes - Cox models now incorporate any penalizers in their ``log_likelihood_`` -.. _section-21: +.. _section-22: 0.23.9 - 2020-01-28 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-17: +.. _bug-fixes-18: Bug fixes ''''''''' @@ -557,12 +579,12 @@ Bug fixes of ``GeneralizedGammaRegressionFitter`` and any custom regression models should update their code as soon as possible. -.. _section-22: +.. _section-23: 0.23.8 - 2020-01-21 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-18: +.. _bug-fixes-19: Bug fixes ''''''''' @@ -573,19 +595,19 @@ Bug fixes ``GeneralizedGammaRegressionFitter`` and any custom regression models should update their code as soon as possible. -.. _section-23: +.. _section-24: 0.23.7 - 2020-01-14 ^^^^^^^^^^^^^^^^^^^ Bug fixes for py3.5. -.. _section-24: +.. _section-25: 0.23.6 - 2020-01-07 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-15: +.. _new-features-16: New features '''''''''''' @@ -599,12 +621,12 @@ New features - custom parametric regression models can now do left and interval censoring. -.. _section-25: +.. _section-26: 0.23.5 - 2020-01-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-16: +.. _new-features-17: New features '''''''''''' @@ -613,7 +635,7 @@ New features - New lymph node cancer dataset, originally from *H.F. for the German Breast Cancer Study Group (GBSG) (1994)* -.. _bug-fixes-19: +.. _bug-fixes-20: Bug fixes ''''''''' @@ -623,26 +645,26 @@ Bug fixes - fixed bug where large exponential numbers in ``print_summary`` were not being suppressed correctly. -.. _section-26: +.. _section-27: 0.23.4 - 2019-12-15 ^^^^^^^^^^^^^^^^^^^ - Bug fix for PyPI -.. _section-27: +.. _section-28: 0.23.3 - 2019-12-11 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-17: +.. _new-features-18: New features '''''''''''' - ``StatisticalResult.print_summary`` supports html output. -.. _bug-fixes-20: +.. _bug-fixes-21: Bug fixes ''''''''' @@ -650,12 +672,12 @@ Bug fixes - fix import in ``printer.py`` - fix html printing with Univariate models. -.. _section-28: +.. _section-29: 0.23.2 - 2019-12-07 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-18: +.. _new-features-19: New features '''''''''''' @@ -667,7 +689,7 @@ New features - performance improvements on regression models’ preprocessing. Should make datasets with high number of columns more performant. -.. _bug-fixes-21: +.. _bug-fixes-22: Bug fixes ''''''''' @@ -676,12 +698,12 @@ Bug fixes - fixed repr for ``sklearn_adapter`` classes. - fixed ``conditional_after`` in Cox model with strata was used. -.. _section-29: +.. _section-30: 0.23.1 - 2019-11-27 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-19: +.. _new-features-20: New features '''''''''''' @@ -691,7 +713,7 @@ New features - performance improvements for ``CoxPHFitter`` - up to 30% performance improvements for some datasets. -.. _bug-fixes-22: +.. _bug-fixes-23: Bug fixes ''''''''' @@ -703,12 +725,12 @@ Bug fixes - fixed bug when using ``print_summary`` with left censored models. - lots of minor bug fixes. -.. _section-30: +.. _section-31: 0.23.0 - 2019-11-17 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-20: +.. _new-features-21: New features '''''''''''' @@ -717,7 +739,7 @@ New features Jupyter notebooks! - silenced some warnings. -.. _bug-fixes-23: +.. _bug-fixes-24: Bug fixes ''''''''' @@ -739,7 +761,7 @@ API Changes - ``left_censorship`` in ``fit`` has been removed in favour of ``fit_left_censoring``. -.. _section-31: +.. _section-32: 0.22.10 - 2019-11-08 ^^^^^^^^^^^^^^^^^^^^ @@ -747,7 +769,7 @@ API Changes The tests were re-factored to be shipped with the package. Let me know if this causes problems. -.. _bug-fixes-24: +.. _bug-fixes-25: Bug fixes ''''''''' @@ -757,12 +779,12 @@ Bug fixes - fixed bug in plot_covariate_groups for AFT models when >1d arrays were used for values arg. -.. _section-32: +.. _section-33: 0.22.9 - 2019-10-30 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-25: +.. _bug-fixes-26: Bug fixes ''''''''' @@ -774,12 +796,12 @@ Bug fixes - ``CoxPHFitter`` now displays correct columns values when changing alpha param. -.. _section-33: +.. _section-34: 0.22.8 - 2019-10-06 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-21: +.. _new-features-22: New features '''''''''''' @@ -789,19 +811,19 @@ New features - ``conditional_after`` now available in ``CoxPHFitter.predict_median`` - Suppressed some unimportant warnings. -.. _bug-fixes-26: +.. _bug-fixes-27: Bug fixes ''''''''' - fixed initial_point being ignored in AFT models. -.. _section-34: +.. _section-35: 0.22.7 - 2019-09-29 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-22: +.. _new-features-23: New features '''''''''''' @@ -809,7 +831,7 @@ New features - new ``ApproximationWarning`` to tell you if the package is making an potentially mislead approximation. -.. _bug-fixes-27: +.. _bug-fixes-28: Bug fixes ''''''''' @@ -828,19 +850,19 @@ API Changes - Some previous ``StatisticalWarnings`` have been replaced by ``ApproximationWarning`` -.. _section-35: +.. _section-36: 0.22.6 - 2019-09-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-23: +.. _new-features-24: New features '''''''''''' - ``conditional_after`` works for ``CoxPHFitter`` prediction models 😅 -.. _bug-fixes-28: +.. _bug-fixes-29: Bug fixes ''''''''' @@ -856,12 +878,12 @@ API Changes - ``utils.dataframe_interpolate_at_times`` renamed to ``utils.interpolate_at_times_and_return_pandas``. -.. _section-36: +.. _section-37: 0.22.5 - 2019-09-20 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-24: +.. _new-features-25: New features '''''''''''' @@ -870,7 +892,7 @@ New features weights. - Better support for predicting on Pandas Series -.. _bug-fixes-29: +.. _bug-fixes-30: Bug fixes ''''''''' @@ -887,12 +909,12 @@ API Changes - ``_get_initial_value`` in parametric univariate models is renamed ``_create_initial_point`` -.. _section-37: +.. _section-38: 0.22.4 - 2019-09-04 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-25: +.. _new-features-26: New features '''''''''''' @@ -911,7 +933,7 @@ API changes - ``KaplanMeierFitter.survival_function_``\ ‘s’ index is no longer given the name “timeline”. -.. _bug-fixes-30: +.. _bug-fixes-31: Bug fixes ''''''''' @@ -919,12 +941,12 @@ Bug fixes - Fixed issue where ``concordance_index`` would never exit if NaNs in dataset. -.. _section-38: +.. _section-39: 0.22.3 - 2019-08-08 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-26: +.. _new-features-27: New features '''''''''''' @@ -948,7 +970,7 @@ API changes gains only in Cox models, and only a small fraction of the API was being used. -.. _bug-fixes-31: +.. _bug-fixes-32: Bug fixes ''''''''' @@ -960,19 +982,19 @@ Bug fixes - Fixed an error in the ``predict_percentile`` of ``LogLogisticAFTFitter``. New tests have been added around this. -.. _section-39: +.. _section-40: 0.22.2 - 2019-07-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-27: +.. _new-features-28: New features '''''''''''' - lifelines is now compatible with scipy>=1.3.0 -.. _bug-fixes-32: +.. _bug-fixes-33: Bug fixes ''''''''' @@ -983,12 +1005,12 @@ Bug fixes errors when using the library. The correctly numpy has been pinned (to 1.14.0+) -.. _section-40: +.. _section-41: 0.22.1 - 2019-07-14 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-28: +.. _new-features-29: New features '''''''''''' @@ -1016,7 +1038,7 @@ API changes ``.print_summary`` includes confidence intervals for the exponential of the value. -.. _bug-fixes-33: +.. _bug-fixes-34: Bug fixes ''''''''' @@ -1026,12 +1048,12 @@ Bug fixes - fixed an overflow bug in ``KaplanMeierFitter`` confidence intervals - improvements in data validation for ``CoxTimeVaryingFitter`` -.. _section-41: +.. _section-42: 0.22.0 - 2019-07-03 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-29: +.. _new-features-30: New features '''''''''''' @@ -1065,7 +1087,7 @@ API changes could set ``fit_intercept`` to False and not have to set ``ancillary_df`` - now one must specify a DataFrame. -.. _bug-fixes-34: +.. _bug-fixes-35: Bug fixes ''''''''' @@ -1074,21 +1096,21 @@ Bug fixes is now exact instead of an approximation. - fixed a name error bug in ``CoxTimeVaryingFitter.plot`` -.. _section-42: +.. _section-43: 0.21.5 - 2019-06-22 ^^^^^^^^^^^^^^^^^^^ I’m skipping 0.21.4 version because of deployment issues. -.. _new-features-30: +.. _new-features-31: New features '''''''''''' - ``scoring_method`` now a kwarg on ``sklearn_adapter`` -.. _bug-fixes-35: +.. _bug-fixes-36: Bug fixes ''''''''' @@ -1098,12 +1120,12 @@ Bug fixes - fixed visual bug that misaligned x-axis ticks and at-risk counts. Thanks @christopherahern! -.. _section-43: +.. _section-44: 0.21.3 - 2019-06-04 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-31: +.. _new-features-32: New features '''''''''''' @@ -1117,19 +1139,19 @@ New features - ``CoxPHFitter.check_assumptions`` now accepts a ``columns`` parameter to specify only checking a subset of columns. -.. _bug-fixes-36: +.. _bug-fixes-37: Bug fixes ''''''''' - ``covariates_from_event_matrix`` handle nulls better -.. _section-44: +.. _section-45: 0.21.2 - 2019-05-16 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-32: +.. _new-features-33: New features '''''''''''' @@ -1153,17 +1175,17 @@ API changes - removing ``_compute_likelihood_ratio_test`` on regression models. Use ``log_likelihood_ratio_test`` now. -.. _bug-fixes-37: +.. _bug-fixes-38: Bug fixes ''''''''' -.. _section-45: +.. _section-46: 0.21.1 - 2019-04-26 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-33: +.. _new-features-34: New features '''''''''''' @@ -1180,19 +1202,19 @@ API changes - output of ``survival_table_from_events`` when collapsing rows to intervals now removes the “aggregate” column multi-index. -.. _bug-fixes-38: +.. _bug-fixes-39: Bug fixes ''''''''' - fixed bug in CoxTimeVaryingFitter when ax is provided, thanks @j-i-l! -.. _section-46: +.. _section-47: 0.21.0 - 2019-04-12 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-34: +.. _new-features-35: New features '''''''''''' @@ -1217,7 +1239,7 @@ API changes - ``entries`` property in multivariate parametric models has a new Series name: ``entry`` -.. _bug-fixes-39: +.. _bug-fixes-40: Bug fixes ''''''''' @@ -1227,12 +1249,12 @@ Bug fixes - Fixed an error that didn’t let users use Numpy arrays in prediction for AFT models -.. _section-47: +.. _section-48: 0.20.5 - 2019-04-08 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-35: +.. _new-features-36: New features '''''''''''' @@ -1249,7 +1271,7 @@ API changes - in ``AalenJohansenFitter``, the ``variance`` parameter is renamed to ``variance_`` to align with the usual lifelines convention. -.. _bug-fixes-40: +.. _bug-fixes-41: Bug fixes ''''''''' @@ -1258,12 +1280,12 @@ Bug fixes test when using strata. - Fixed some plotting bugs with ``AalenJohansenFitter`` -.. _section-48: +.. _section-49: 0.20.4 - 2019-03-27 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-36: +.. _new-features-37: New features '''''''''''' @@ -1282,7 +1304,7 @@ API changes - Pandas is now correctly pinned to >= 0.23.0. This was always the case, but not specified in setup.py correctly. -.. _bug-fixes-41: +.. _bug-fixes-42: Bug fixes ''''''''' @@ -1291,12 +1313,12 @@ Bug fixes - ``PiecewiseExponentialFitter`` is available with ``from lifelines import *``. -.. _section-49: +.. _section-50: 0.20.3 - 2019-03-23 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-37: +.. _new-features-38: New features '''''''''''' @@ -1309,12 +1331,12 @@ New features ``plot_survival_function`` and ``confidence_interval_survival_function_``. -.. _section-50: +.. _section-51: 0.20.2 - 2019-03-21 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-38: +.. _new-features-39: New features '''''''''''' @@ -1338,7 +1360,7 @@ API changes @vpolimenov! - The ``C`` column in ``load_lcd`` dataset is renamed to ``E``. -.. _bug-fixes-42: +.. _bug-fixes-43: Bug fixes ''''''''' @@ -1354,7 +1376,7 @@ Bug fixes the q parameter was below the truncation limit. This should have been ``-np.inf`` -.. _section-51: +.. _section-52: 0.20.1 - 2019-03-16 ^^^^^^^^^^^^^^^^^^^ @@ -1378,7 +1400,7 @@ API changes This is no longer the case. A 0 will still be added if there is a duration (observed or not) at 0 occurs however. -.. _section-52: +.. _section-53: 0.20.0 - 2019-03-05 ^^^^^^^^^^^^^^^^^^^ @@ -1387,7 +1409,7 @@ API changes recent installs where Py3. - Updated minimum dependencies, specifically Matplotlib and Pandas. -.. _new-features-39: +.. _new-features-40: New features '''''''''''' @@ -1407,19 +1429,19 @@ API changes transposed now (previous parameters where columns, now parameters are rows). -.. _bug-fixes-43: +.. _bug-fixes-44: Bug fixes ''''''''' - Fixed a bug with plotting and ``check_assumptions``. -.. _section-53: +.. _section-54: 0.19.5 - 2019-02-26 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-40: +.. _new-features-41: New features '''''''''''' @@ -1429,24 +1451,24 @@ New features features or categorical variables. - Convergence improvements for AFT models. -.. _section-54: +.. _section-55: 0.19.4 - 2019-02-25 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-44: +.. _bug-fixes-45: Bug fixes ''''''''' - remove some bad print statements in ``CoxPHFitter``. -.. _section-55: +.. _section-56: 0.19.3 - 2019-02-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-41: +.. _new-features-42: New features '''''''''''' @@ -1458,12 +1480,12 @@ New features - Performance increase to ``print_summary`` in the ``CoxPHFitter`` and ``CoxTimeVaryingFitter`` model. -.. _section-56: +.. _section-57: 0.19.2 - 2019-02-22 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-42: +.. _new-features-43: New features '''''''''''' @@ -1471,7 +1493,7 @@ New features - ``ParametricUnivariateFitters``, like ``WeibullFitter``, have smoothed plots when plotting (vs stepped plots) -.. _bug-fixes-45: +.. _bug-fixes-46: Bug fixes ''''''''' @@ -1481,12 +1503,12 @@ Bug fixes - Univariate fitters are more flexiable and can allow 2-d and DataFrames as inputs. -.. _section-57: +.. _section-58: 0.19.1 - 2019-02-21 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-43: +.. _new-features-44: New features '''''''''''' @@ -1503,12 +1525,12 @@ API changes ``PiecewiseExponential`` to the same as ``ExponentialFitter`` (from ``\lambda * t`` to ``t / \lambda``). -.. _section-58: +.. _section-59: 0.19.0 - 2019-02-20 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-44: +.. _new-features-45: New features '''''''''''' @@ -1547,7 +1569,7 @@ API changes means that the *default* for alpha is set to 0.05 in the latest lifelines, instead of 0.95 in previous versions. -.. _bug-fixes-46: +.. _bug-fixes-47: Bug Fixes ''''''''' @@ -1564,7 +1586,7 @@ Bug Fixes models. Thanks @airanmehr! - Fixed some Pandas <0.24 bugs. -.. _section-59: +.. _section-60: 0.18.6 - 2019-02-13 ^^^^^^^^^^^^^^^^^^^ @@ -1574,7 +1596,7 @@ Bug Fixes ``rank`` and ``km`` p-values now. - some performance improvements to ``qth_survival_time``. -.. _section-60: +.. _section-61: 0.18.5 - 2019-02-11 ^^^^^^^^^^^^^^^^^^^ @@ -1595,7 +1617,7 @@ Bug Fixes that can be used to turn off variance calculations since this can take a long time for large datasets. Thanks @pzivich! -.. _section-61: +.. _section-62: 0.18.4 - 2019-02-10 ^^^^^^^^^^^^^^^^^^^ @@ -1605,7 +1627,7 @@ Bug Fixes - adding left-truncation support to parametric univarite models with the ``entry`` kwarg in ``.fit`` -.. _section-62: +.. _section-63: 0.18.3 - 2019-02-07 ^^^^^^^^^^^^^^^^^^^ @@ -1615,7 +1637,7 @@ Bug Fixes warnings are more noticeable. - Improved some warning and error messages. -.. _section-63: +.. _section-64: 0.18.2 - 2019-02-05 ^^^^^^^^^^^^^^^^^^^ @@ -1631,7 +1653,7 @@ Bug Fixes Moved them all (most) to use ``autograd``. - ``LogNormalFitter`` no longer models ``log_sigma``. -.. _section-64: +.. _section-65: 0.18.1 - 2019-02-02 ^^^^^^^^^^^^^^^^^^^ @@ -1642,7 +1664,7 @@ Bug Fixes - use the ``autograd`` lib to help with gradients. - New ``LogLogisticFitter`` univariate fitter available. -.. _section-65: +.. _section-66: 0.18.0 - 2019-01-31 ^^^^^^^^^^^^^^^^^^^ @@ -1679,7 +1701,7 @@ Bug Fixes ``LinAlgError: Matrix is singular.`` and report back to the user advice. -.. _section-66: +.. _section-67: 0.17.5 - 2019-01-25 ^^^^^^^^^^^^^^^^^^^ @@ -1687,7 +1709,7 @@ Bug Fixes - more bugs in ``plot_covariate_groups`` fixed when using non-numeric strata. -.. _section-67: +.. _section-68: 0.17.4 -2019-01-25 ^^^^^^^^^^^^^^^^^^ @@ -1699,7 +1721,7 @@ Bug Fixes - ``groups`` is now called ``values`` in ``CoxPHFitter.plot_covariate_groups`` -.. _section-68: +.. _section-69: 0.17.3 - 2019-01-24 ^^^^^^^^^^^^^^^^^^^ @@ -1707,7 +1729,7 @@ Bug Fixes - Fix in ``compute_residuals`` when using ``schoenfeld`` and the minumum duration has only censored subjects. -.. _section-69: +.. _section-70: 0.17.2 2019-01-22 ^^^^^^^^^^^^^^^^^ @@ -1718,7 +1740,7 @@ Bug Fixes ``for`` loop. The downside is the code is more esoteric now. I’ve added comments as necessary though 🤞 -.. _section-70: +.. _section-71: 0.17.1 - 2019-01-20 ^^^^^^^^^^^^^^^^^^^ @@ -1735,7 +1757,7 @@ Bug Fixes - Fixes a Pandas performance warning in ``CoxTimeVaryingFitter``. - Performances improvements to ``CoxTimeVaryingFitter``. -.. _section-71: +.. _section-72: 0.17.0 - 2019-01-11 ^^^^^^^^^^^^^^^^^^^ @@ -1756,7 +1778,7 @@ Bug Fixes - some plotting improvemnts to ``plotting.plot_lifetimes`` -.. _section-72: +.. _section-73: 0.16.3 - 2019-01-03 ^^^^^^^^^^^^^^^^^^^ @@ -1764,7 +1786,7 @@ Bug Fixes - More ``CoxPHFitter`` performance improvements. Up to a 40% reduction vs 0.16.2 for some datasets. -.. _section-73: +.. _section-74: 0.16.2 - 2019-01-02 ^^^^^^^^^^^^^^^^^^^ @@ -1775,14 +1797,14 @@ Bug Fixes has lots of duplicate times. See https://github.com/CamDavidsonPilon/lifelines/issues/591 -.. _section-74: +.. _section-75: 0.16.1 - 2019-01-01 ^^^^^^^^^^^^^^^^^^^ - Fixed py2 division error in ``concordance`` method. -.. _section-75: +.. _section-76: 0.16.0 - 2019-01-01 ^^^^^^^^^^^^^^^^^^^ @@ -1818,7 +1840,7 @@ Bug Fixes ``lifelines.utils.to_episodic_format``. - ``CoxTimeVaryingFitter`` now accepts ``strata``. -.. _section-76: +.. _section-77: 0.15.4 ^^^^^^ @@ -1826,14 +1848,14 @@ Bug Fixes - bug fix for the Cox model likelihood ratio test when using non-trivial weights. -.. _section-77: +.. _section-78: 0.15.3 - 2018-12-18 ^^^^^^^^^^^^^^^^^^^ - Only allow matplotlib less than 3.0. -.. _section-78: +.. _section-79: 0.15.2 - 2018-11-23 ^^^^^^^^^^^^^^^^^^^ @@ -1844,7 +1866,7 @@ Bug Fixes - removed ``entry`` from ``ExponentialFitter`` and ``WeibullFitter`` as it was doing nothing. -.. _section-79: +.. _section-80: 0.15.1 - 2018-11-23 ^^^^^^^^^^^^^^^^^^^ @@ -1853,7 +1875,7 @@ Bug Fixes - Raise NotImplementedError if the ``robust`` flag is used in ``CoxTimeVaryingFitter`` - that’s not ready yet. -.. _section-80: +.. _section-81: 0.15.0 - 2018-11-22 ^^^^^^^^^^^^^^^^^^^ @@ -1924,7 +1946,7 @@ Bug Fixes When Estimating Risks in Pharmacoepidemiology” for a nice overview of the model. -.. _section-81: +.. _section-82: 0.14.6 - 2018-07-02 ^^^^^^^^^^^^^^^^^^^ @@ -1932,7 +1954,7 @@ Bug Fixes - fix for n > 2 groups in ``multivariate_logrank_test`` (again). - fix bug for when ``event_observed`` column was not boolean. -.. _section-82: +.. _section-83: 0.14.5 - 2018-06-29 ^^^^^^^^^^^^^^^^^^^ @@ -1940,7 +1962,7 @@ Bug Fixes - fix for n > 2 groups in ``multivariate_logrank_test`` - fix weights in KaplanMeierFitter when using a pandas Series. -.. _section-83: +.. _section-84: 0.14.4 - 2018-06-14 ^^^^^^^^^^^^^^^^^^^ @@ -1957,7 +1979,7 @@ Bug Fixes - New ``delay`` parameter in ``add_covariate_to_timeline`` - removed ``two_sided_z_test`` from ``statistics`` -.. _section-84: +.. _section-85: 0.14.3 - 2018-05-24 ^^^^^^^^^^^^^^^^^^^ @@ -1969,7 +1991,7 @@ Bug Fixes - adds a ``column`` argument to ``CoxTimeVaryingFitter`` and ``CoxPHFitter`` ``plot`` method to plot only a subset of columns. -.. _section-85: +.. _section-86: 0.14.2 - 2018-05-18 ^^^^^^^^^^^^^^^^^^^ @@ -1977,7 +1999,7 @@ Bug Fixes - some quality of life improvements for working with ``CoxTimeVaryingFitter`` including new ``predict_`` methods. -.. _section-86: +.. _section-87: 0.14.1 - 2018-04-01 ^^^^^^^^^^^^^^^^^^^ @@ -1995,7 +2017,7 @@ Bug Fixes faster completion of ``fit`` for large dataframes, and up to 10% faster for small dataframes. -.. _section-87: +.. _section-88: 0.14.0 - 2018-03-03 ^^^^^^^^^^^^^^^^^^^ @@ -2017,7 +2039,7 @@ Bug Fixes of a ``RuntimeWarning`` - New checks for complete separation in the dataset for regressions. -.. _section-88: +.. _section-89: 0.13.0 - 2017-12-22 ^^^^^^^^^^^^^^^^^^^ @@ -2046,7 +2068,7 @@ Bug Fixes group the same subjects together and give that observation a weight equal to the count. Altogether, this means a much faster regression. -.. _section-89: +.. _section-90: 0.12.0 ^^^^^^ @@ -2063,7 +2085,7 @@ Bug Fixes - Additional functionality to ``utils.survival_table_from_events`` to bin the index to make the resulting table more readable. -.. _section-90: +.. _section-91: 0.11.3 ^^^^^^ @@ -2075,7 +2097,7 @@ Bug Fixes observation or censorship. - More accurate prediction methods parametrics univariate models. -.. _section-91: +.. _section-92: 0.11.2 ^^^^^^ @@ -2083,14 +2105,14 @@ Bug Fixes - Changing liscense to valilla MIT. - Speed up ``NelsonAalenFitter.fit`` considerably. -.. _section-92: +.. _section-93: 0.11.1 - 2017-06-22 ^^^^^^^^^^^^^^^^^^^ - Python3 fix for ``CoxPHFitter.plot``. -.. _section-93: +.. _section-94: 0.11.0 - 2017-06-21 ^^^^^^^^^^^^^^^^^^^ @@ -2104,14 +2126,14 @@ Bug Fixes of a new ``loc`` kwarg. This is to align with Pandas deprecating ``ix`` -.. _section-94: +.. _section-95: 0.10.1 - 2017-06-05 ^^^^^^^^^^^^^^^^^^^ - fix in internal normalization for ``CoxPHFitter`` predict methods. -.. _section-95: +.. _section-96: 0.10.0 ^^^^^^ @@ -2126,7 +2148,7 @@ Bug Fixes mimic R’s ``basehaz`` API. - new ``predict_log_partial_hazards`` to ``CoxPHFitter`` -.. _section-96: +.. _section-97: 0.9.4 ^^^^^ @@ -2149,7 +2171,7 @@ Bug Fixes - performance improvements in ``CoxPHFitter`` - should see at least a 10% speed improvement in ``fit``. -.. _section-97: +.. _section-98: 0.9.2 ^^^^^ @@ -2158,7 +2180,7 @@ Bug Fixes - throw an error if no admissable pairs in the c-index calculation. Previously a NaN was returned. -.. _section-98: +.. _section-99: 0.9.1 ^^^^^ @@ -2166,7 +2188,7 @@ Bug Fixes - add two summary functions to Weibull and Exponential fitter, solves #224 -.. _section-99: +.. _section-100: 0.9.0 ^^^^^ @@ -2182,7 +2204,7 @@ Bug Fixes - Default predict method in ``k_fold_cross_validation`` is now ``predict_expectation`` -.. _section-100: +.. _section-101: 0.8.1 - 2015-08-01 ^^^^^^^^^^^^^^^^^^ @@ -2199,7 +2221,7 @@ Bug Fixes - scaling of smooth hazards in NelsonAalenFitter was off by a factor of 0.5. -.. _section-101: +.. _section-102: 0.8.0 ^^^^^ @@ -2218,7 +2240,7 @@ Bug Fixes ``lifelines.statistics. power_under_cph``. - fixed a bug when using KaplanMeierFitter for left-censored data. -.. _section-102: +.. _section-103: 0.7.1 ^^^^^ @@ -2237,7 +2259,7 @@ Bug Fixes - refactor each fitter into it’s own submodule. For now, the tests are still in the same file. This will also *not* break the API. -.. _section-103: +.. _section-104: 0.7.0 - 2015-03-01 ^^^^^^^^^^^^^^^^^^ @@ -2256,7 +2278,7 @@ Bug Fixes duration remaining until the death event, given survival up until time t. -.. _section-104: +.. _section-105: 0.6.1 ^^^^^ @@ -2268,7 +2290,7 @@ Bug Fixes your work is to sum up the survival function (for expected values or something similar), it’s more difficult to make a mistake. -.. _section-105: +.. _section-106: 0.6.0 - 2015-02-04 ^^^^^^^^^^^^^^^^^^ @@ -2291,7 +2313,7 @@ Bug Fixes - In ``KaplanMeierFitter``, ``epsilon`` has been renamed to ``precision``. -.. _section-106: +.. _section-107: 0.5.1 - 2014-12-24 ^^^^^^^^^^^^^^^^^^ @@ -2312,7 +2334,7 @@ Bug Fixes ``lifelines.plotting.add_at_risk_counts``. - Fix bug Epanechnikov kernel. -.. _section-107: +.. _section-108: 0.5.0 - 2014-12-07 ^^^^^^^^^^^^^^^^^^ @@ -2325,7 +2347,7 @@ Bug Fixes - add test for summary() - Alternate metrics can be used for ``k_fold_cross_validation``. -.. _section-108: +.. _section-109: 0.4.4 - 2014-11-27 ^^^^^^^^^^^^^^^^^^ @@ -2337,7 +2359,7 @@ Bug Fixes - Fixes bug in 1-d input not returning in CoxPHFitter - Lots of new tests. -.. _section-109: +.. _section-110: 0.4.3 - 2014-07-23 ^^^^^^^^^^^^^^^^^^ @@ -2358,7 +2380,7 @@ Bug Fixes - Adds option ``include_likelihood`` to CoxPHFitter fit method to save the final log-likelihood value. -.. _section-110: +.. _section-111: 0.4.2 - 2014-06-19 ^^^^^^^^^^^^^^^^^^ @@ -2378,7 +2400,7 @@ Bug Fixes from failing so often (this a stop-gap) - pep8 everything -.. _section-111: +.. _section-112: 0.4.1.1 ^^^^^^^ @@ -2391,7 +2413,7 @@ Bug Fixes - Adding more robust cross validation scheme based on issue #67. - fixing ``regression_dataset`` in ``datasets``. -.. _section-112: +.. _section-113: 0.4.1 - 2014-06-11 ^^^^^^^^^^^^^^^^^^ @@ -2410,7 +2432,7 @@ Bug Fixes - Adding a Changelog. - more sanitizing for the statistical tests =) -.. _section-113: +.. _section-114: 0.4.0 - 2014-06-08 ^^^^^^^^^^^^^^^^^^