Skip to content

Commit

Permalink
V0.18.4 (#631)
Browse files Browse the repository at this point in the history
* v0.18.4

* lint and add a test
  • Loading branch information
CamDavidsonPilon authored Feb 10, 2019
1 parent 0fb9430 commit 134e2c5
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 62 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
### Changelogs

### 0.18.4
- fixed confidence intervals in cumulative hazards for parametric univarite models. They were previously
serverly depressed.
- adding left-truncation support to parametric univarite models with the `entry` kwarg in `.fit`

### 0.18.3
- Some performance improvements to parametric univariate models.
- Suppressing some irrelevant NumPy and autograd warnings, so lifeline warnings are more noticeable.
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
#
# The short X.Y version.

version = "0.18.3"
version = "0.18.4"
# The full version, including dev info
release = version

Expand Down
42 changes: 21 additions & 21 deletions docs/jupyter_notebooks/Modelling time-lagged conversion rates.ipynb

Large diffs are not rendered by default.

85 changes: 64 additions & 21 deletions lifelines/fitters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,8 @@ class ParametericUnivariateFitter(UnivariateFitter):
"""

_MIN_PARAMETER_VALUE = 0.000001
_KNOWN_MODEL = False
_MIN_PARAMETER_VALUE = 1e-09

def __init__(self, *args, **kwargs):
super(ParametericUnivariateFitter, self).__init__(*args, **kwargs)
Expand All @@ -232,49 +233,54 @@ def __init__(self, *args, **kwargs):
# pylint: disable=no-value-for-parameter,unexpected-keyword-arg
self._hazard = egrad(self._cumulative_hazard, argnum=1)
if not hasattr(self, "_bounds"):
self._bounds = [(self._MIN_PARAMETER_VALUE, None)] * len(self._fitted_parameter_names)
self._bounds = [(0, None)] * len(self._fitted_parameter_names)
self._bounds = list(self._buffer_bounds(self._bounds))

if not hasattr(self, "_initial_values"):
self._initial_values = np.array(list(self._initial_values_from_bounds()))

if "alpha" in self._fitted_parameter_names:
raise NameError("'alpha' in _fitted_parameter_names is a lifelines reserved word. Try 'alpha_' instead.")

def _check_cumulative_hazard_is_monotone_and_positive(self, durations):
if len(self._bounds) != len(self._fitted_parameter_names) != self._initial_values.shape[0]:
raise ValueError(
"_bounds must be the same shape as _fitted_parameters_names must be the same shape as _initial_values"
)

def _check_cumulative_hazard_is_monotone_and_positive(self, durations, values):
class_name = self.__class__.__name__

cumulative_hazard = self._cumulative_hazard(self._initial_values, durations)
cumulative_hazard = self._cumulative_hazard(values, durations)
if not np.all(cumulative_hazard > 0):
warnings.warn(
dedent(
"""\
Cumulative hazard is not strictly positive. For example, try:
>>> test_times = np.linspace(0.01, 100, 15)
>>> fitter = {0}()
>>> fitter._cumulative_hazard(fitter._initial_values, test_times)
>>> fitter._cumulative_hazard(np.{1}, np.sort(durations))
This may harm convergence, or return nonsensical results.
""".format(
class_name
class_name, values.__repr__()
)
),
StatisticalWarning,
)

derivative_of_cumulative_hazard = self._hazard(self._initial_values, durations)
derivative_of_cumulative_hazard = self._hazard(values, durations)
if not np.all(derivative_of_cumulative_hazard >= 0):
warnings.warn(
dedent(
"""\
Cumulative hazard is not strictly non-decreasing. For example, try:
>>> test_times = np.linspace(0.01, 100, 15)
>>> fitter = {0}()
>>> fitter._hazard(fitter._initial_values, test_times)
>>> fitter._hazard({1}, np.sort(durations))
This may harm convergence, or return nonsensical results.
""".format(
class_name
class_name, values.__repr__()
)
),
StatisticalWarning,
Expand All @@ -291,18 +297,33 @@ def _initial_values_from_bounds(self):
else:
yield (ub - lb) / 2

def _buffer_bounds(self, bounds):
for (lb, ub) in bounds:
if lb is None and ub is None:
yield (None, None)
elif lb is None:
yield (None, self._MIN_PARAMETER_VALUE)
elif ub is None:
yield (self._MIN_PARAMETER_VALUE, None)
else:
yield (lb + self._MIN_PARAMETER_VALUE, ub - self._MIN_PARAMETER_VALUE)

def _cumulative_hazard(self, params, times):
raise NotImplementedError

def _survival_function(self, params, times):
return anp.exp(-self._cumulative_hazard(params, times))

def _negative_log_likelihood(self, params, T, E):
def _negative_log_likelihood(self, params, T, E, entry):
n = T.shape[0]
hz = self._hazard(params, T[E])
hz = anp.clip(hz, 1e-18, np.inf)

ll = (anp.log(hz)).sum() - self._cumulative_hazard(params, T).sum()
ll = (
(anp.log(hz)).sum()
- self._cumulative_hazard(params, T).sum()
+ self._cumulative_hazard(params, entry).sum()
)
return -ll / n

def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
Expand All @@ -315,7 +336,7 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
)

gradient_at_times = np.vstack(
[gradient_of_cum_hazard_at_mle(basis)[1] for basis in np.eye(len(self._fitted_parameters_))]
[gradient_of_cum_hazard_at_mle(basis) for basis in np.eye(len(self._fitted_parameters_))]
)

std_cumulative_hazard = np.sqrt(
Expand All @@ -325,13 +346,13 @@ def _compute_confidence_bounds_of_cumulative_hazard(self, alpha, ci_labels):
if ci_labels is None:
ci_labels = ["%s_upper_%.2f" % (self._label, alpha), "%s_lower_%.2f" % (self._label, alpha)]
assert len(ci_labels) == 2, "ci_labels should be a length 2 array."

df[ci_labels[0]] = self.cumulative_hazard_at_times(self.timeline) + alpha2 * std_cumulative_hazard
df[ci_labels[1]] = self.cumulative_hazard_at_times(self.timeline) - alpha2 * std_cumulative_hazard
return df

def _fit_model(self, T, E, show_progress=True):
def _fit_model(self, T, E, entry, show_progress=True):

non_zero_entries = entry[entry > 0]
with warnings.catch_warnings():
warnings.simplefilter("ignore")

Expand All @@ -340,14 +361,14 @@ def _fit_model(self, T, E, show_progress=True):
self._initial_values,
jac=True,
method="L-BFGS-B",
args=(T, E),
args=(T, E, non_zero_entries),
bounds=self._bounds,
options={"disp": show_progress},
)

if results.success:
# pylint: disable=no-value-for-parameter
hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E)
hessian_ = hessian(self._negative_log_likelihood)(results.x, T, E, non_zero_entries)
return results.x, -results.fun, hessian_ * T.shape[0]
print(results)
raise ConvergenceError(
Expand Down Expand Up @@ -449,7 +470,15 @@ def print_summary(self, decimals=2, **kwargs):
print(df.to_string(float_format=format_floats(decimals), formatters={"p": format_p_value(decimals)}))

def fit(
self, durations, event_observed=None, timeline=None, label=None, alpha=None, ci_labels=None, show_progress=False
self,
durations,
event_observed=None,
timeline=None,
label=None,
alpha=None,
ci_labels=None,
show_progress=False,
entry=None,
): # pylint: disable=too-many-arguments
"""
Parameters
Expand All @@ -471,6 +500,9 @@ def fit(
as a length-2 list: [<lower-bound name>, <upper-bound name>]. Default: <label>_lower_<alpha>
show_progress: boolean, optional
since this is an iterative fitting algorithm, switching this to True will display some iteration details.
entry: an array, or pd.Series, of length n -- relative time when a subject entered the study. This is
useful for left-truncated (not left-censored) observations. If None, all members of the population
entered study when they were "born": time zero.
Returns
-------
Expand All @@ -491,12 +523,15 @@ def fit(
"This model does not allow for non-positive durations. Suggestion: add a small positive value to zero elements."
)

self._check_cumulative_hazard_is_monotone_and_positive(self.durations)
if not self._KNOWN_MODEL:
self._check_cumulative_hazard_is_monotone_and_positive(self.durations, self._initial_values)

self.event_observed = (
np.asarray(event_observed, dtype=int) if event_observed is not None else np.ones_like(self.durations)
)

self.entry = np.asarray(entry) if entry is not None else np.zeros_like(self.durations)

if timeline is not None:
self.timeline = np.sort(np.asarray(timeline))
else:
Expand All @@ -507,9 +542,12 @@ def fit(

# estimation
self._fitted_parameters_, self._log_likelihood, self._hessian_ = self._fit_model(
self.durations, self.event_observed.astype(bool), show_progress=show_progress
self.durations, self.event_observed.astype(bool), self.entry, show_progress=show_progress
)

if not self._KNOWN_MODEL:
self._check_cumulative_hazard_is_monotone_and_positive(self.durations, self._fitted_parameters_)

for param_name, fitted_value in zip(self._fitted_parameter_names, self._fitted_parameters_):
setattr(self, param_name, fitted_value)

Expand Down Expand Up @@ -561,3 +599,8 @@ def hazard_at_times(self, times):
@_must_call_fit_first
def median_(self):
return median_survival_times(self.survival_function_)


class KnownModelParametericUnivariateFitter(ParametericUnivariateFitter):

_KNOWN_MODEL = True
7 changes: 4 additions & 3 deletions lifelines/fitters/exponential_fitter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
from __future__ import print_function
import numpy as np
from lifelines.fitters import ParametericUnivariateFitter
from lifelines.fitters import KnownModelParametericUnivariateFitter


class ExponentialFitter(ParametericUnivariateFitter):
class ExponentialFitter(KnownModelParametericUnivariateFitter):
r"""
This class implements an Exponential model for univariate data. The model has parameterized
form:
Expand Down Expand Up @@ -36,7 +36,8 @@ class ExponentialFitter(ParametericUnivariateFitter):
def median_(self):
return 1.0 / self.lambda_ * (np.log(2))

def _fit_model(self, T, E, show_progress=False):
def _fit_model(self, T, E, entry, show_progress=False):
T = T - entry
lambda_ = E.sum() / T.sum()
lambda_variance_ = lambda_ / T.sum()
log_likelihood = np.log(lambda_) * E.sum() - lambda_ * T.sum()
Expand Down
4 changes: 2 additions & 2 deletions lifelines/fitters/log_logistic_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from __future__ import print_function, division
import autograd.numpy as np

from lifelines.fitters import ParametericUnivariateFitter
from lifelines.fitters import KnownModelParametericUnivariateFitter


class LogLogisticFitter(ParametericUnivariateFitter):
class LogLogisticFitter(KnownModelParametericUnivariateFitter):

r"""
This class implements a Log-Logistic model for univariate data. The model has parameterized
Expand Down
6 changes: 3 additions & 3 deletions lifelines/fitters/log_normal_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import autograd.numpy as np
from autograd.scipy.stats import norm

from lifelines.fitters import ParametericUnivariateFitter
from lifelines.fitters import KnownModelParametericUnivariateFitter


class LogNormalFitter(ParametericUnivariateFitter):
class LogNormalFitter(KnownModelParametericUnivariateFitter):
r"""
This class implements an Log Normal model for univariate data. The model has parameterized
form:
Expand All @@ -28,7 +28,7 @@ class LogNormalFitter(ParametericUnivariateFitter):
"""

_fitted_parameter_names = ["mu_", "sigma_"]
_bounds = [(None, None), (ParametericUnivariateFitter._MIN_PARAMETER_VALUE, None)]
_bounds = [(None, None), (0, None)]

@property
def median_(self):
Expand Down
4 changes: 2 additions & 2 deletions lifelines/fitters/piecewise_exponential_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from __future__ import print_function, division
import autograd.numpy as np

from lifelines.fitters import ParametericUnivariateFitter
from lifelines.fitters import KnownModelParametericUnivariateFitter


class PiecewiseExponentialFitter(ParametericUnivariateFitter):
class PiecewiseExponentialFitter(KnownModelParametericUnivariateFitter):
r"""
This class implements an Piecewise Exponential model for univariate data. The model has parameterized
hazard rate:
Expand Down
4 changes: 2 additions & 2 deletions lifelines/fitters/weibull_fitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
from __future__ import print_function, division
import autograd.numpy as np

from lifelines.fitters import ParametericUnivariateFitter
from lifelines.fitters import KnownModelParametericUnivariateFitter


class WeibullFitter(ParametericUnivariateFitter):
class WeibullFitter(KnownModelParametericUnivariateFitter):

r"""
This class implements a Weibull model for univariate data. The model has parameterized
Expand Down
2 changes: 1 addition & 1 deletion lifelines/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

__version__ = "0.18.3"
__version__ = "0.18.4"
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def filepath(fname):
setup(
name="lifelines",
version=__version__,
author="Cameron Davidson-Pilon, Jonas Kalderstam",
author="Cameron Davidson-Pilon",
author_email="[email protected]",
description="Survival analysis in Python, including Kaplan Meier, Nelson Aalen and regression",
license="MIT",
Expand Down
Loading

0 comments on commit 134e2c5

Please sign in to comment.