diff --git a/CHANGELOG.md b/CHANGELOG.md index cfb61340c..599a70999 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,17 @@ ## Changelog +#### 0.24.9 - 2020-06-05 + +##### New features + - Faster NPMLE for interval censored data + - New weightings available in the `logrank_test`: `wilcoxon`, `tarone-ware`, `peto`, `fleming-harrington`. Thanks @sean-reed + - new interval censored dataset: `lifelines.datasets.load_mice` + +##### Bug fixes + - Cleared up some mislabeling in `plot_loglogs`. Thanks @sean-reed! + - tuples are now able to be used as input in univariate models. + #### 0.24.8 - 2020-05-17 ##### New features diff --git a/docs/Changelog.rst b/docs/Changelog.rst index c8525f28a..df861f95a 100644 --- a/docs/Changelog.rst +++ b/docs/Changelog.rst @@ -1,9 +1,30 @@ Changelog --------- +0.24.9 - 2020-06-05 +^^^^^^^^^^^^^^^^^^^ + +New features +'''''''''''' + +- Faster NPMLE for interval censored data +- New weightings available in the ``logrank_test``: ``wilcoxon``, + ``tarone-ware``, ``peto``, ``fleming-harrington``. Thanks @sean-reed +- new interval censored dataset: ``lifelines.datasets.load_mice`` + +Bug fixes +''''''''' + +- Cleared up some mislabeling in ``plot_loglogs``. Thanks @sean-reed! +- tuples are now able to be used as input in univariate models. + +.. _section-1: + 0.24.8 - 2020-05-17 ^^^^^^^^^^^^^^^^^^^ +.. _new-features-1: + New features '''''''''''' @@ -11,12 +32,12 @@ New features Not all edge cases are fully checked, and some features are missing. Try it under ``KaplanMeierFitter.fit_interval_censoring`` -.. _section-1: +.. _section-2: 0.24.7 - 2020-05-17 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-1: +.. _new-features-2: New features '''''''''''' @@ -32,12 +53,12 @@ New features - some convergence tweaks which should help recent performance regressions. -.. _section-2: +.. _section-3: 0.24.6 - 2020-05-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-2: +.. _new-features-3: New features '''''''''''' @@ -47,25 +68,27 @@ New features - New ``lifelines.plotting.plot_interval_censored_lifetimes`` for plotting interval censored data - thanks @sean-reed! +.. _bug-fixes-1: + Bug fixes ''''''''' - fixed bug where ``cdf_plot`` and ``qq_plot`` were not factoring in the weights correctly. -.. _section-3: +.. _section-4: 0.24.5 - 2020-05-01 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-3: +.. _new-features-4: New features '''''''''''' - ``plot_lifetimes`` accepts pandas Series. -.. _bug-fixes-1: +.. _bug-fixes-2: Bug fixes ''''''''' @@ -75,12 +98,12 @@ Bug fixes - Improved ``at_risk_counts`` for subplots. - More data validation checks for ``CoxTimeVaryingFitter`` -.. _section-4: +.. _section-5: 0.24.4 - 2020-04-13 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-2: +.. _bug-fixes-3: Bug fixes ''''''''' @@ -89,12 +112,12 @@ Bug fixes - setting a dataframe in ``ancillary_df`` works for interval censoring - ``.score`` works for interval censored models -.. _section-5: +.. _section-6: 0.24.3 - 2020-03-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-4: +.. _new-features-5: New features '''''''''''' @@ -104,7 +127,7 @@ New features the hazard ratio would be at previous times. This is useful because the final hazard ratio is some weighted average of these. -.. _bug-fixes-3: +.. _bug-fixes-4: Bug fixes ''''''''' @@ -112,12 +135,12 @@ Bug fixes - Fixed error in HTML printer that was hiding concordance index information. -.. _section-6: +.. _section-7: 0.24.2 - 2020-03-15 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-4: +.. _bug-fixes-5: Bug fixes ''''''''' @@ -129,12 +152,12 @@ Bug fixes - Fixed a keyword bug in ``plot_covariate_groups`` for parametric models. -.. _section-7: +.. _section-8: 0.24.1 - 2020-03-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-5: +.. _new-features-6: New features '''''''''''' @@ -142,14 +165,14 @@ New features - Stability improvements for GeneralizedGammaRegressionFitter and CoxPHFitter with spline estimation. -.. _bug-fixes-5: +.. _bug-fixes-6: Bug fixes ''''''''' - Fixed bug with plotting hazards in NelsonAalenFitter. -.. _section-8: +.. _section-9: 0.24.0 - 2020-02-20 ^^^^^^^^^^^^^^^^^^^ @@ -158,7 +181,7 @@ This version and future versions of lifelines no longer support py35. Pandas 1.0 is fully supported, along with previous versions. Minimum Scipy has been bumped to 1.2.0. -.. _new-features-6: +.. _new-features-7: New features '''''''''''' @@ -208,7 +231,7 @@ API Changes to ``scoring_method``. - removed ``_score_`` and ``path`` from Cox model. -.. _bug-fixes-6: +.. _bug-fixes-7: Bug fixes ''''''''' @@ -221,12 +244,12 @@ Bug fixes - Cox models now incorporate any penalizers in their ``log_likelihood_`` -.. _section-9: +.. _section-10: 0.23.9 - 2020-01-28 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-7: +.. _bug-fixes-8: Bug fixes ''''''''' @@ -237,12 +260,12 @@ Bug fixes of ``GeneralizedGammaRegressionFitter`` and any custom regression models should update their code as soon as possible. -.. _section-10: +.. _section-11: 0.23.8 - 2020-01-21 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-8: +.. _bug-fixes-9: Bug fixes ''''''''' @@ -253,19 +276,19 @@ Bug fixes ``GeneralizedGammaRegressionFitter`` and any custom regression models should update their code as soon as possible. -.. _section-11: +.. _section-12: 0.23.7 - 2020-01-14 ^^^^^^^^^^^^^^^^^^^ Bug fixes for py3.5. -.. _section-12: +.. _section-13: 0.23.6 - 2020-01-07 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-7: +.. _new-features-8: New features '''''''''''' @@ -279,12 +302,12 @@ New features - custom parametric regression models can now do left and interval censoring. -.. _section-13: +.. _section-14: 0.23.5 - 2020-01-05 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-8: +.. _new-features-9: New features '''''''''''' @@ -293,7 +316,7 @@ New features - New lymph node cancer dataset, originally from *H.F. for the German Breast Cancer Study Group (GBSG) (1994)* -.. _bug-fixes-9: +.. _bug-fixes-10: Bug fixes ''''''''' @@ -303,26 +326,26 @@ Bug fixes - fixed bug where large exponential numbers in ``print_summary`` were not being suppressed correctly. -.. _section-14: +.. _section-15: 0.23.4 - 2019-12-15 ^^^^^^^^^^^^^^^^^^^ - Bug fix for PyPI -.. _section-15: +.. _section-16: 0.23.3 - 2019-12-11 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-9: +.. _new-features-10: New features '''''''''''' - ``StatisticalResult.print_summary`` supports html output. -.. _bug-fixes-10: +.. _bug-fixes-11: Bug fixes ''''''''' @@ -330,12 +353,12 @@ Bug fixes - fix import in ``printer.py`` - fix html printing with Univariate models. -.. _section-16: +.. _section-17: 0.23.2 - 2019-12-07 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-10: +.. _new-features-11: New features '''''''''''' @@ -347,7 +370,7 @@ New features - performance improvements on regression models’ preprocessing. Should make datasets with high number of columns more performant. -.. _bug-fixes-11: +.. _bug-fixes-12: Bug fixes ''''''''' @@ -356,12 +379,12 @@ Bug fixes - fixed repr for ``sklearn_adapter`` classes. - fixed ``conditional_after`` in Cox model with strata was used. -.. _section-17: +.. _section-18: 0.23.1 - 2019-11-27 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-11: +.. _new-features-12: New features '''''''''''' @@ -371,7 +394,7 @@ New features - performance improvements for ``CoxPHFitter`` - up to 30% performance improvements for some datasets. -.. _bug-fixes-12: +.. _bug-fixes-13: Bug fixes ''''''''' @@ -383,12 +406,12 @@ Bug fixes - fixed bug when using ``print_summary`` with left censored models. - lots of minor bug fixes. -.. _section-18: +.. _section-19: 0.23.0 - 2019-11-17 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-12: +.. _new-features-13: New features '''''''''''' @@ -397,7 +420,7 @@ New features Jupyter notebooks! - silenced some warnings. -.. _bug-fixes-13: +.. _bug-fixes-14: Bug fixes ''''''''' @@ -419,7 +442,7 @@ API Changes - ``left_censorship`` in ``fit`` has been removed in favour of ``fit_left_censoring``. -.. _section-19: +.. _section-20: 0.22.10 - 2019-11-08 ^^^^^^^^^^^^^^^^^^^^ @@ -427,7 +450,7 @@ API Changes The tests were re-factored to be shipped with the package. Let me know if this causes problems. -.. _bug-fixes-14: +.. _bug-fixes-15: Bug fixes ''''''''' @@ -437,12 +460,12 @@ Bug fixes - fixed bug in plot_covariate_groups for AFT models when >1d arrays were used for values arg. -.. _section-20: +.. _section-21: 0.22.9 - 2019-10-30 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-15: +.. _bug-fixes-16: Bug fixes ''''''''' @@ -454,12 +477,12 @@ Bug fixes - ``CoxPHFitter`` now displays correct columns values when changing alpha param. -.. _section-21: +.. _section-22: 0.22.8 - 2019-10-06 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-13: +.. _new-features-14: New features '''''''''''' @@ -469,19 +492,19 @@ New features - ``conditional_after`` now available in ``CoxPHFitter.predict_median`` - Suppressed some unimportant warnings. -.. _bug-fixes-16: +.. _bug-fixes-17: Bug fixes ''''''''' - fixed initial_point being ignored in AFT models. -.. _section-22: +.. _section-23: 0.22.7 - 2019-09-29 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-14: +.. _new-features-15: New features '''''''''''' @@ -489,7 +512,7 @@ New features - new ``ApproximationWarning`` to tell you if the package is making an potentially mislead approximation. -.. _bug-fixes-17: +.. _bug-fixes-18: Bug fixes ''''''''' @@ -508,19 +531,19 @@ API Changes - Some previous ``StatisticalWarnings`` have been replaced by ``ApproximationWarning`` -.. _section-23: +.. _section-24: 0.22.6 - 2019-09-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-15: +.. _new-features-16: New features '''''''''''' - ``conditional_after`` works for ``CoxPHFitter`` prediction models 😅 -.. _bug-fixes-18: +.. _bug-fixes-19: Bug fixes ''''''''' @@ -536,12 +559,12 @@ API Changes - ``utils.dataframe_interpolate_at_times`` renamed to ``utils.interpolate_at_times_and_return_pandas``. -.. _section-24: +.. _section-25: 0.22.5 - 2019-09-20 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-16: +.. _new-features-17: New features '''''''''''' @@ -550,7 +573,7 @@ New features weights. - Better support for predicting on Pandas Series -.. _bug-fixes-19: +.. _bug-fixes-20: Bug fixes ''''''''' @@ -567,12 +590,12 @@ API Changes - ``_get_initial_value`` in parametric univariate models is renamed ``_create_initial_point`` -.. _section-25: +.. _section-26: 0.22.4 - 2019-09-04 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-17: +.. _new-features-18: New features '''''''''''' @@ -591,7 +614,7 @@ API changes - ``KaplanMeierFitter.survival_function_``\ ‘s’ index is no longer given the name “timeline”. -.. _bug-fixes-20: +.. _bug-fixes-21: Bug fixes ''''''''' @@ -599,12 +622,12 @@ Bug fixes - Fixed issue where ``concordance_index`` would never exit if NaNs in dataset. -.. _section-26: +.. _section-27: 0.22.3 - 2019-08-08 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-18: +.. _new-features-19: New features '''''''''''' @@ -628,7 +651,7 @@ API changes gains only in Cox models, and only a small fraction of the API was being used. -.. _bug-fixes-21: +.. _bug-fixes-22: Bug fixes ''''''''' @@ -640,19 +663,19 @@ Bug fixes - Fixed an error in the ``predict_percentile`` of ``LogLogisticAFTFitter``. New tests have been added around this. -.. _section-27: +.. _section-28: 0.22.2 - 2019-07-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-19: +.. _new-features-20: New features '''''''''''' - lifelines is now compatible with scipy>=1.3.0 -.. _bug-fixes-22: +.. _bug-fixes-23: Bug fixes ''''''''' @@ -663,12 +686,12 @@ Bug fixes errors when using the library. The correctly numpy has been pinned (to 1.14.0+) -.. _section-28: +.. _section-29: 0.22.1 - 2019-07-14 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-20: +.. _new-features-21: New features '''''''''''' @@ -696,7 +719,7 @@ API changes ``.print_summary`` includes confidence intervals for the exponential of the value. -.. _bug-fixes-23: +.. _bug-fixes-24: Bug fixes ''''''''' @@ -706,12 +729,12 @@ Bug fixes - fixed an overflow bug in ``KaplanMeierFitter`` confidence intervals - improvements in data validation for ``CoxTimeVaryingFitter`` -.. _section-29: +.. _section-30: 0.22.0 - 2019-07-03 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-21: +.. _new-features-22: New features '''''''''''' @@ -745,7 +768,7 @@ API changes could set ``fit_intercept`` to False and not have to set ``ancillary_df`` - now one must specify a DataFrame. -.. _bug-fixes-24: +.. _bug-fixes-25: Bug fixes ''''''''' @@ -754,21 +777,21 @@ Bug fixes is now exact instead of an approximation. - fixed a name error bug in ``CoxTimeVaryingFitter.plot`` -.. _section-30: +.. _section-31: 0.21.5 - 2019-06-22 ^^^^^^^^^^^^^^^^^^^ I’m skipping 0.21.4 version because of deployment issues. -.. _new-features-22: +.. _new-features-23: New features '''''''''''' - ``scoring_method`` now a kwarg on ``sklearn_adapter`` -.. _bug-fixes-25: +.. _bug-fixes-26: Bug fixes ''''''''' @@ -778,12 +801,12 @@ Bug fixes - fixed visual bug that misaligned x-axis ticks and at-risk counts. Thanks @christopherahern! -.. _section-31: +.. _section-32: 0.21.3 - 2019-06-04 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-23: +.. _new-features-24: New features '''''''''''' @@ -797,19 +820,19 @@ New features - ``CoxPHFitter.check_assumptions`` now accepts a ``columns`` parameter to specify only checking a subset of columns. -.. _bug-fixes-26: +.. _bug-fixes-27: Bug fixes ''''''''' - ``covariates_from_event_matrix`` handle nulls better -.. _section-32: +.. _section-33: 0.21.2 - 2019-05-16 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-24: +.. _new-features-25: New features '''''''''''' @@ -833,17 +856,17 @@ API changes - removing ``_compute_likelihood_ratio_test`` on regression models. Use ``log_likelihood_ratio_test`` now. -.. _bug-fixes-27: +.. _bug-fixes-28: Bug fixes ''''''''' -.. _section-33: +.. _section-34: 0.21.1 - 2019-04-26 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-25: +.. _new-features-26: New features '''''''''''' @@ -860,19 +883,19 @@ API changes - output of ``survival_table_from_events`` when collapsing rows to intervals now removes the “aggregate” column multi-index. -.. _bug-fixes-28: +.. _bug-fixes-29: Bug fixes ''''''''' - fixed bug in CoxTimeVaryingFitter when ax is provided, thanks @j-i-l! -.. _section-34: +.. _section-35: 0.21.0 - 2019-04-12 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-26: +.. _new-features-27: New features '''''''''''' @@ -897,7 +920,7 @@ API changes - ``entries`` property in multivariate parametric models has a new Series name: ``entry`` -.. _bug-fixes-29: +.. _bug-fixes-30: Bug fixes ''''''''' @@ -907,12 +930,12 @@ Bug fixes - Fixed an error that didn’t let users use Numpy arrays in prediction for AFT models -.. _section-35: +.. _section-36: 0.20.5 - 2019-04-08 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-27: +.. _new-features-28: New features '''''''''''' @@ -929,7 +952,7 @@ API changes - in ``AalenJohansenFitter``, the ``variance`` parameter is renamed to ``variance_`` to align with the usual lifelines convention. -.. _bug-fixes-30: +.. _bug-fixes-31: Bug fixes ''''''''' @@ -938,12 +961,12 @@ Bug fixes test when using strata. - Fixed some plotting bugs with ``AalenJohansenFitter`` -.. _section-36: +.. _section-37: 0.20.4 - 2019-03-27 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-28: +.. _new-features-29: New features '''''''''''' @@ -962,7 +985,7 @@ API changes - Pandas is now correctly pinned to >= 0.23.0. This was always the case, but not specified in setup.py correctly. -.. _bug-fixes-31: +.. _bug-fixes-32: Bug fixes ''''''''' @@ -971,12 +994,12 @@ Bug fixes - ``PiecewiseExponentialFitter`` is available with ``from lifelines import *``. -.. _section-37: +.. _section-38: 0.20.3 - 2019-03-23 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-29: +.. _new-features-30: New features '''''''''''' @@ -989,12 +1012,12 @@ New features ``plot_survival_function`` and ``confidence_interval_survival_function_``. -.. _section-38: +.. _section-39: 0.20.2 - 2019-03-21 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-30: +.. _new-features-31: New features '''''''''''' @@ -1018,7 +1041,7 @@ API changes @vpolimenov! - The ``C`` column in ``load_lcd`` dataset is renamed to ``E``. -.. _bug-fixes-32: +.. _bug-fixes-33: Bug fixes ''''''''' @@ -1034,7 +1057,7 @@ Bug fixes the q parameter was below the truncation limit. This should have been ``-np.inf`` -.. _section-39: +.. _section-40: 0.20.1 - 2019-03-16 ^^^^^^^^^^^^^^^^^^^ @@ -1058,7 +1081,7 @@ API changes This is no longer the case. A 0 will still be added if there is a duration (observed or not) at 0 occurs however. -.. _section-40: +.. _section-41: 0.20.0 - 2019-03-05 ^^^^^^^^^^^^^^^^^^^ @@ -1067,7 +1090,7 @@ API changes recent installs where Py3. - Updated minimum dependencies, specifically Matplotlib and Pandas. -.. _new-features-31: +.. _new-features-32: New features '''''''''''' @@ -1087,19 +1110,19 @@ API changes transposed now (previous parameters where columns, now parameters are rows). -.. _bug-fixes-33: +.. _bug-fixes-34: Bug fixes ''''''''' - Fixed a bug with plotting and ``check_assumptions``. -.. _section-41: +.. _section-42: 0.19.5 - 2019-02-26 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-32: +.. _new-features-33: New features '''''''''''' @@ -1109,24 +1132,24 @@ New features features or categorical variables. - Convergence improvements for AFT models. -.. _section-42: +.. _section-43: 0.19.4 - 2019-02-25 ^^^^^^^^^^^^^^^^^^^ -.. _bug-fixes-34: +.. _bug-fixes-35: Bug fixes ''''''''' - remove some bad print statements in ``CoxPHFitter``. -.. _section-43: +.. _section-44: 0.19.3 - 2019-02-25 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-33: +.. _new-features-34: New features '''''''''''' @@ -1138,12 +1161,12 @@ New features - Performance increase to ``print_summary`` in the ``CoxPHFitter`` and ``CoxTimeVaryingFitter`` model. -.. _section-44: +.. _section-45: 0.19.2 - 2019-02-22 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-34: +.. _new-features-35: New features '''''''''''' @@ -1151,7 +1174,7 @@ New features - ``ParametricUnivariateFitters``, like ``WeibullFitter``, have smoothed plots when plotting (vs stepped plots) -.. _bug-fixes-35: +.. _bug-fixes-36: Bug fixes ''''''''' @@ -1161,12 +1184,12 @@ Bug fixes - Univariate fitters are more flexiable and can allow 2-d and DataFrames as inputs. -.. _section-45: +.. _section-46: 0.19.1 - 2019-02-21 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-35: +.. _new-features-36: New features '''''''''''' @@ -1183,12 +1206,12 @@ API changes ``PiecewiseExponential`` to the same as ``ExponentialFitter`` (from ``\lambda * t`` to ``t / \lambda``). -.. _section-46: +.. _section-47: 0.19.0 - 2019-02-20 ^^^^^^^^^^^^^^^^^^^ -.. _new-features-36: +.. _new-features-37: New features '''''''''''' @@ -1227,7 +1250,7 @@ API changes means that the *default* for alpha is set to 0.05 in the latest lifelines, instead of 0.95 in previous versions. -.. _bug-fixes-36: +.. _bug-fixes-37: Bug Fixes ''''''''' @@ -1244,7 +1267,7 @@ Bug Fixes models. Thanks @airanmehr! - Fixed some Pandas <0.24 bugs. -.. _section-47: +.. _section-48: 0.18.6 - 2019-02-13 ^^^^^^^^^^^^^^^^^^^ @@ -1254,7 +1277,7 @@ Bug Fixes ``rank`` and ``km`` p-values now. - some performance improvements to ``qth_survival_time``. -.. _section-48: +.. _section-49: 0.18.5 - 2019-02-11 ^^^^^^^^^^^^^^^^^^^ @@ -1275,7 +1298,7 @@ Bug Fixes that can be used to turn off variance calculations since this can take a long time for large datasets. Thanks @pzivich! -.. _section-49: +.. _section-50: 0.18.4 - 2019-02-10 ^^^^^^^^^^^^^^^^^^^ @@ -1285,7 +1308,7 @@ Bug Fixes - adding left-truncation support to parametric univarite models with the ``entry`` kwarg in ``.fit`` -.. _section-50: +.. _section-51: 0.18.3 - 2019-02-07 ^^^^^^^^^^^^^^^^^^^ @@ -1295,7 +1318,7 @@ Bug Fixes warnings are more noticeable. - Improved some warning and error messages. -.. _section-51: +.. _section-52: 0.18.2 - 2019-02-05 ^^^^^^^^^^^^^^^^^^^ @@ -1311,7 +1334,7 @@ Bug Fixes Moved them all (most) to use ``autograd``. - ``LogNormalFitter`` no longer models ``log_sigma``. -.. _section-52: +.. _section-53: 0.18.1 - 2019-02-02 ^^^^^^^^^^^^^^^^^^^ @@ -1322,7 +1345,7 @@ Bug Fixes - use the ``autograd`` lib to help with gradients. - New ``LogLogisticFitter`` univariate fitter available. -.. _section-53: +.. _section-54: 0.18.0 - 2019-01-31 ^^^^^^^^^^^^^^^^^^^ @@ -1359,7 +1382,7 @@ Bug Fixes ``LinAlgError: Matrix is singular.`` and report back to the user advice. -.. _section-54: +.. _section-55: 0.17.5 - 2019-01-25 ^^^^^^^^^^^^^^^^^^^ @@ -1367,7 +1390,7 @@ Bug Fixes - more bugs in ``plot_covariate_groups`` fixed when using non-numeric strata. -.. _section-55: +.. _section-56: 0.17.4 -2019-01-25 ^^^^^^^^^^^^^^^^^^ @@ -1379,7 +1402,7 @@ Bug Fixes - ``groups`` is now called ``values`` in ``CoxPHFitter.plot_covariate_groups`` -.. _section-56: +.. _section-57: 0.17.3 - 2019-01-24 ^^^^^^^^^^^^^^^^^^^ @@ -1387,7 +1410,7 @@ Bug Fixes - Fix in ``compute_residuals`` when using ``schoenfeld`` and the minumum duration has only censored subjects. -.. _section-57: +.. _section-58: 0.17.2 2019-01-22 ^^^^^^^^^^^^^^^^^ @@ -1398,7 +1421,7 @@ Bug Fixes ``for`` loop. The downside is the code is more esoteric now. I’ve added comments as necessary though 🤞 -.. _section-58: +.. _section-59: 0.17.1 - 2019-01-20 ^^^^^^^^^^^^^^^^^^^ @@ -1415,7 +1438,7 @@ Bug Fixes - Fixes a Pandas performance warning in ``CoxTimeVaryingFitter``. - Performances improvements to ``CoxTimeVaryingFitter``. -.. _section-59: +.. _section-60: 0.17.0 - 2019-01-11 ^^^^^^^^^^^^^^^^^^^ @@ -1436,7 +1459,7 @@ Bug Fixes - some plotting improvemnts to ``plotting.plot_lifetimes`` -.. _section-60: +.. _section-61: 0.16.3 - 2019-01-03 ^^^^^^^^^^^^^^^^^^^ @@ -1444,7 +1467,7 @@ Bug Fixes - More ``CoxPHFitter`` performance improvements. Up to a 40% reduction vs 0.16.2 for some datasets. -.. _section-61: +.. _section-62: 0.16.2 - 2019-01-02 ^^^^^^^^^^^^^^^^^^^ @@ -1455,14 +1478,14 @@ Bug Fixes has lots of duplicate times. See https://github.com/CamDavidsonPilon/lifelines/issues/591 -.. _section-62: +.. _section-63: 0.16.1 - 2019-01-01 ^^^^^^^^^^^^^^^^^^^ - Fixed py2 division error in ``concordance`` method. -.. _section-63: +.. _section-64: 0.16.0 - 2019-01-01 ^^^^^^^^^^^^^^^^^^^ @@ -1498,7 +1521,7 @@ Bug Fixes ``lifelines.utils.to_episodic_format``. - ``CoxTimeVaryingFitter`` now accepts ``strata``. -.. _section-64: +.. _section-65: 0.15.4 ^^^^^^ @@ -1506,14 +1529,14 @@ Bug Fixes - bug fix for the Cox model likelihood ratio test when using non-trivial weights. -.. _section-65: +.. _section-66: 0.15.3 - 2018-12-18 ^^^^^^^^^^^^^^^^^^^ - Only allow matplotlib less than 3.0. -.. _section-66: +.. _section-67: 0.15.2 - 2018-11-23 ^^^^^^^^^^^^^^^^^^^ @@ -1524,7 +1547,7 @@ Bug Fixes - removed ``entry`` from ``ExponentialFitter`` and ``WeibullFitter`` as it was doing nothing. -.. _section-67: +.. _section-68: 0.15.1 - 2018-11-23 ^^^^^^^^^^^^^^^^^^^ @@ -1533,7 +1556,7 @@ Bug Fixes - Raise NotImplementedError if the ``robust`` flag is used in ``CoxTimeVaryingFitter`` - that’s not ready yet. -.. _section-68: +.. _section-69: 0.15.0 - 2018-11-22 ^^^^^^^^^^^^^^^^^^^ @@ -1604,7 +1627,7 @@ Bug Fixes When Estimating Risks in Pharmacoepidemiology” for a nice overview of the model. -.. _section-69: +.. _section-70: 0.14.6 - 2018-07-02 ^^^^^^^^^^^^^^^^^^^ @@ -1612,7 +1635,7 @@ Bug Fixes - fix for n > 2 groups in ``multivariate_logrank_test`` (again). - fix bug for when ``event_observed`` column was not boolean. -.. _section-70: +.. _section-71: 0.14.5 - 2018-06-29 ^^^^^^^^^^^^^^^^^^^ @@ -1620,7 +1643,7 @@ Bug Fixes - fix for n > 2 groups in ``multivariate_logrank_test`` - fix weights in KaplanMeierFitter when using a pandas Series. -.. _section-71: +.. _section-72: 0.14.4 - 2018-06-14 ^^^^^^^^^^^^^^^^^^^ @@ -1637,7 +1660,7 @@ Bug Fixes - New ``delay`` parameter in ``add_covariate_to_timeline`` - removed ``two_sided_z_test`` from ``statistics`` -.. _section-72: +.. _section-73: 0.14.3 - 2018-05-24 ^^^^^^^^^^^^^^^^^^^ @@ -1649,7 +1672,7 @@ Bug Fixes - adds a ``column`` argument to ``CoxTimeVaryingFitter`` and ``CoxPHFitter`` ``plot`` method to plot only a subset of columns. -.. _section-73: +.. _section-74: 0.14.2 - 2018-05-18 ^^^^^^^^^^^^^^^^^^^ @@ -1657,7 +1680,7 @@ Bug Fixes - some quality of life improvements for working with ``CoxTimeVaryingFitter`` including new ``predict_`` methods. -.. _section-74: +.. _section-75: 0.14.1 - 2018-04-01 ^^^^^^^^^^^^^^^^^^^ @@ -1675,7 +1698,7 @@ Bug Fixes faster completion of ``fit`` for large dataframes, and up to 10% faster for small dataframes. -.. _section-75: +.. _section-76: 0.14.0 - 2018-03-03 ^^^^^^^^^^^^^^^^^^^ @@ -1697,7 +1720,7 @@ Bug Fixes of a ``RuntimeWarning`` - New checks for complete separation in the dataset for regressions. -.. _section-76: +.. _section-77: 0.13.0 - 2017-12-22 ^^^^^^^^^^^^^^^^^^^ @@ -1726,7 +1749,7 @@ Bug Fixes group the same subjects together and give that observation a weight equal to the count. Altogether, this means a much faster regression. -.. _section-77: +.. _section-78: 0.12.0 ^^^^^^ @@ -1743,7 +1766,7 @@ Bug Fixes - Additional functionality to ``utils.survival_table_from_events`` to bin the index to make the resulting table more readable. -.. _section-78: +.. _section-79: 0.11.3 ^^^^^^ @@ -1755,7 +1778,7 @@ Bug Fixes observation or censorship. - More accurate prediction methods parametrics univariate models. -.. _section-79: +.. _section-80: 0.11.2 ^^^^^^ @@ -1763,14 +1786,14 @@ Bug Fixes - Changing liscense to valilla MIT. - Speed up ``NelsonAalenFitter.fit`` considerably. -.. _section-80: +.. _section-81: 0.11.1 - 2017-06-22 ^^^^^^^^^^^^^^^^^^^ - Python3 fix for ``CoxPHFitter.plot``. -.. _section-81: +.. _section-82: 0.11.0 - 2017-06-21 ^^^^^^^^^^^^^^^^^^^ @@ -1784,14 +1807,14 @@ Bug Fixes of a new ``loc`` kwarg. This is to align with Pandas deprecating ``ix`` -.. _section-82: +.. _section-83: 0.10.1 - 2017-06-05 ^^^^^^^^^^^^^^^^^^^ - fix in internal normalization for ``CoxPHFitter`` predict methods. -.. _section-83: +.. _section-84: 0.10.0 ^^^^^^ @@ -1806,7 +1829,7 @@ Bug Fixes mimic R’s ``basehaz`` API. - new ``predict_log_partial_hazards`` to ``CoxPHFitter`` -.. _section-84: +.. _section-85: 0.9.4 ^^^^^ @@ -1829,7 +1852,7 @@ Bug Fixes - performance improvements in ``CoxPHFitter`` - should see at least a 10% speed improvement in ``fit``. -.. _section-85: +.. _section-86: 0.9.2 ^^^^^ @@ -1838,7 +1861,7 @@ Bug Fixes - throw an error if no admissable pairs in the c-index calculation. Previously a NaN was returned. -.. _section-86: +.. _section-87: 0.9.1 ^^^^^ @@ -1846,7 +1869,7 @@ Bug Fixes - add two summary functions to Weibull and Exponential fitter, solves #224 -.. _section-87: +.. _section-88: 0.9.0 ^^^^^ @@ -1862,7 +1885,7 @@ Bug Fixes - Default predict method in ``k_fold_cross_validation`` is now ``predict_expectation`` -.. _section-88: +.. _section-89: 0.8.1 - 2015-08-01 ^^^^^^^^^^^^^^^^^^ @@ -1879,7 +1902,7 @@ Bug Fixes - scaling of smooth hazards in NelsonAalenFitter was off by a factor of 0.5. -.. _section-89: +.. _section-90: 0.8.0 ^^^^^ @@ -1898,7 +1921,7 @@ Bug Fixes ``lifelines.statistics. power_under_cph``. - fixed a bug when using KaplanMeierFitter for left-censored data. -.. _section-90: +.. _section-91: 0.7.1 ^^^^^ @@ -1917,7 +1940,7 @@ Bug Fixes - refactor each fitter into it’s own submodule. For now, the tests are still in the same file. This will also *not* break the API. -.. _section-91: +.. _section-92: 0.7.0 - 2015-03-01 ^^^^^^^^^^^^^^^^^^ @@ -1936,7 +1959,7 @@ Bug Fixes duration remaining until the death event, given survival up until time t. -.. _section-92: +.. _section-93: 0.6.1 ^^^^^ @@ -1948,7 +1971,7 @@ Bug Fixes your work is to sum up the survival function (for expected values or something similar), it’s more difficult to make a mistake. -.. _section-93: +.. _section-94: 0.6.0 - 2015-02-04 ^^^^^^^^^^^^^^^^^^ @@ -1971,7 +1994,7 @@ Bug Fixes - In ``KaplanMeierFitter``, ``epsilon`` has been renamed to ``precision``. -.. _section-94: +.. _section-95: 0.5.1 - 2014-12-24 ^^^^^^^^^^^^^^^^^^ @@ -1992,7 +2015,7 @@ Bug Fixes ``lifelines.plotting.add_at_risk_counts``. - Fix bug Epanechnikov kernel. -.. _section-95: +.. _section-96: 0.5.0 - 2014-12-07 ^^^^^^^^^^^^^^^^^^ @@ -2005,7 +2028,7 @@ Bug Fixes - add test for summary() - Alternate metrics can be used for ``k_fold_cross_validation``. -.. _section-96: +.. _section-97: 0.4.4 - 2014-11-27 ^^^^^^^^^^^^^^^^^^ @@ -2017,7 +2040,7 @@ Bug Fixes - Fixes bug in 1-d input not returning in CoxPHFitter - Lots of new tests. -.. _section-97: +.. _section-98: 0.4.3 - 2014-07-23 ^^^^^^^^^^^^^^^^^^ @@ -2038,7 +2061,7 @@ Bug Fixes - Adds option ``include_likelihood`` to CoxPHFitter fit method to save the final log-likelihood value. -.. _section-98: +.. _section-99: 0.4.2 - 2014-06-19 ^^^^^^^^^^^^^^^^^^ @@ -2058,7 +2081,7 @@ Bug Fixes from failing so often (this a stop-gap) - pep8 everything -.. _section-99: +.. _section-100: 0.4.1.1 ^^^^^^^ @@ -2071,7 +2094,7 @@ Bug Fixes - Adding more robust cross validation scheme based on issue #67. - fixing ``regression_dataset`` in ``datasets``. -.. _section-100: +.. _section-101: 0.4.1 - 2014-06-11 ^^^^^^^^^^^^^^^^^^ @@ -2090,7 +2113,7 @@ Bug Fixes - Adding a Changelog. - more sanitizing for the statistical tests =) -.. _section-101: +.. _section-102: 0.4.0 - 2014-06-08 ^^^^^^^^^^^^^^^^^^ diff --git a/lifelines/datasets/__init__.py b/lifelines/datasets/__init__.py index 9c41d05f4..c0673ca20 100644 --- a/lifelines/datasets/__init__.py +++ b/lifelines/datasets/__init__.py @@ -557,3 +557,14 @@ def load_c_botulinum_lag_phase(**kwargs): """ return _load_dataset("c_botulinum_lag_phase.csv", **kwargs) + + +def load_mice(**kwargs): + """ + A dataset of interval-censored observations of mice tumors in two different environments. + + References + ----------- + Hoel D. and Walburg, H.,(1972), Statistical analysis of survival experiments, The Annals of Statistics, 18, 1259-1294 + """ + return _load_dataset("mice.csv", **kwargs) diff --git a/lifelines/datasets/mice.csv b/lifelines/datasets/mice.csv new file mode 100644 index 000000000..b8f34ed9b --- /dev/null +++ b/lifelines/datasets/mice.csv @@ -0,0 +1,145 @@ +"","l","u","grp" +"1",0,381,"ce" +"2",0,477,"ce" +"3",0,485,"ce" +"4",0,515,"ce" +"5",0,539,"ce" +"6",0,563,"ce" +"7",0,565,"ce" +"8",0,582,"ce" +"9",0,603,"ce" +"10",0,616,"ce" +"11",0,624,"ce" +"12",0,650,"ce" +"13",0,651,"ce" +"14",0,656,"ce" +"15",0,659,"ce" +"16",0,672,"ce" +"17",0,679,"ce" +"18",0,698,"ce" +"19",0,702,"ce" +"20",0,709,"ce" +"21",0,723,"ce" +"22",0,731,"ce" +"23",0,775,"ce" +"24",0,779,"ce" +"25",0,795,"ce" +"26",0,811,"ce" +"27",0,839,"ce" +"28",45,Inf,"ce" +"29",198,Inf,"ce" +"30",215,Inf,"ce" +"31",217,Inf,"ce" +"32",257,Inf,"ce" +"33",262,Inf,"ce" +"34",266,Inf,"ce" +"35",371,Inf,"ce" +"36",431,Inf,"ce" +"37",447,Inf,"ce" +"38",454,Inf,"ce" +"39",459,Inf,"ce" +"40",475,Inf,"ce" +"41",479,Inf,"ce" +"42",484,Inf,"ce" +"43",500,Inf,"ce" +"44",502,Inf,"ce" +"45",503,Inf,"ce" +"46",505,Inf,"ce" +"47",508,Inf,"ce" +"48",516,Inf,"ce" +"49",531,Inf,"ce" +"50",541,Inf,"ce" +"51",553,Inf,"ce" +"52",556,Inf,"ce" +"53",570,Inf,"ce" +"54",572,Inf,"ce" +"55",575,Inf,"ce" +"56",577,Inf,"ce" +"57",585,Inf,"ce" +"58",588,Inf,"ce" +"59",594,Inf,"ce" +"60",600,Inf,"ce" +"61",601,Inf,"ce" +"62",608,Inf,"ce" +"63",614,Inf,"ce" +"64",616,Inf,"ce" +"65",632,Inf,"ce" +"66",632,Inf,"ce" +"67",638,Inf,"ce" +"68",642,Inf,"ce" +"69",642,Inf,"ce" +"70",642,Inf,"ce" +"71",644,Inf,"ce" +"72",644,Inf,"ce" +"73",647,Inf,"ce" +"74",647,Inf,"ce" +"75",653,Inf,"ce" +"76",659,Inf,"ce" +"77",660,Inf,"ce" +"78",662,Inf,"ce" +"79",663,Inf,"ce" +"80",667,Inf,"ce" +"81",667,Inf,"ce" +"82",673,Inf,"ce" +"83",673,Inf,"ce" +"84",677,Inf,"ce" +"85",689,Inf,"ce" +"86",693,Inf,"ce" +"87",718,Inf,"ce" +"88",720,Inf,"ce" +"89",721,Inf,"ce" +"90",728,Inf,"ce" +"91",760,Inf,"ce" +"92",762,Inf,"ce" +"93",773,Inf,"ce" +"94",777,Inf,"ce" +"95",815,Inf,"ce" +"96",886,Inf,"ce" +"97",0,546,"ge" +"98",0,609,"ge" +"99",0,692,"ge" +"100",0,692,"ge" +"101",0,710,"ge" +"102",0,752,"ge" +"103",0,773,"ge" +"104",0,781,"ge" +"105",0,782,"ge" +"106",0,789,"ge" +"107",0,808,"ge" +"108",0,810,"ge" +"109",0,814,"ge" +"110",0,842,"ge" +"111",0,846,"ge" +"112",0,851,"ge" +"113",0,871,"ge" +"114",0,873,"ge" +"115",0,876,"ge" +"116",0,888,"ge" +"117",0,888,"ge" +"118",0,890,"ge" +"119",0,894,"ge" +"120",0,896,"ge" +"121",0,911,"ge" +"122",0,913,"ge" +"123",0,914,"ge" +"124",0,914,"ge" +"125",0,916,"ge" +"126",0,921,"ge" +"127",0,921,"ge" +"128",0,926,"ge" +"129",0,936,"ge" +"130",0,945,"ge" +"131",0,1008,"ge" +"132",412,Inf,"ge" +"133",524,Inf,"ge" +"134",647,Inf,"ge" +"135",648,Inf,"ge" +"136",695,Inf,"ge" +"137",785,Inf,"ge" +"138",814,Inf,"ge" +"139",817,Inf,"ge" +"140",851,Inf,"ge" +"141",880,Inf,"ge" +"142",913,Inf,"ge" +"143",942,Inf,"ge" +"144",986,Inf,"ge" diff --git a/lifelines/fitters/__init__.py b/lifelines/fitters/__init__.py index 8457fb3be..3383afaa7 100644 --- a/lifelines/fitters/__init__.py +++ b/lifelines/fitters/__init__.py @@ -93,6 +93,7 @@ def _update_docstrings(self): self.__class__.divide.__doc__ = self.divide.__doc__.format(self._estimate_name, self._class_name) self.__class__.predict.__doc__ = self.predict.__doc__.format(self._class_name) self.__class__.plot.__doc__ = _plot_estimate.__doc__.format(self._class_name, self._estimate_name) + return def plot(self, **kwargs): """ @@ -882,6 +883,7 @@ def _fit( n = len(utils.coalesce(*Ts)) if event_observed is not None: + event_observed = np.asarray(event_observed) utils.check_nans_or_infs(event_observed) self.event_observed = np.asarray(event_observed, dtype=int) if event_observed is not None else np.ones(n) diff --git a/lifelines/fitters/coxph_fitter.py b/lifelines/fitters/coxph_fitter.py index fdcbcd75d..5c56ae25e 100644 --- a/lifelines/fitters/coxph_fitter.py +++ b/lifelines/fitters/coxph_fitter.py @@ -1965,7 +1965,7 @@ def score(self, df: pd.DataFrame, scoring_method: str = "log_likelihood") -> flo """ if self.baseline_estimation_method != "breslow": - return NotImplementedError("Only breslow implemented atm.") + raise NotImplementedError("Only breslow implemented atm.") df = df.copy() diff --git a/lifelines/fitters/kaplan_meier_fitter.py b/lifelines/fitters/kaplan_meier_fitter.py index 83c0991a5..9e36e69a9 100644 --- a/lifelines/fitters/kaplan_meier_fitter.py +++ b/lifelines/fitters/kaplan_meier_fitter.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -import functools import warnings import numpy as np import pandas as pd @@ -125,10 +124,10 @@ def fit_interval_censoring( label=None, alpha=None, ci_labels=None, - show_progress=False, entry=None, weights=None, - tol=1e-7, + tol: float = 1e-5, + show_progress: bool = False, ) -> "KaplanMeierFitter": """ Fit the model to a interval-censored dataset using non-parametric MLE. This estimator is @@ -168,6 +167,10 @@ def fit_interval_censoring( if providing a weighted dataset. For example, instead of providing every subject as a single element of `durations` and `event_observed`, one could weigh subject differently. + tol: float, optional + minimum difference in log likelihood changes for iterative algorithm. + show_progress: bool, optional + display information during fitting. Returns ------- @@ -203,12 +206,11 @@ def fit_interval_censoring( self._label = coalesce(label, self._label, "NPMLE_estimate") - results = npmle(self.lower_bound, self.upper_bound, verbose=show_progress) + results = npmle(self.lower_bound, self.upper_bound, verbose=show_progress, tol=tol) self.survival_function_ = reconstruct_survival_function(*results, self.timeline, label=self._label).loc[self.timeline] self.cumulative_density_ = 1 - self.survival_function_ self._median = median_survival_times(self.survival_function_) - self.percentile = functools.partial(qth_survival_time, model_or_survival_function=self.survival_function_) """ self.confidence_interval_ = npmle_compute_confidence_intervals(self.lower_bound, self.upper_bound, self.survival_function_, self.alpha) @@ -291,8 +293,11 @@ def _fit( self with new properties like ``survival_function_``, ``plot()``, ``median_survival_time_`` """ + durations = np.asarray(durations) self._check_values(durations) + if event_observed is not None: + event_observed = np.asarray(event_observed) self._check_values(event_observed) self._label = coalesce(label, self._label, "KM_estimate") @@ -345,7 +350,6 @@ def _fit( self.__estimate = getattr(self, primary_estimate_name) self.confidence_interval_ = self._bounds(cumulative_sq_[:, None], alpha, ci_labels) self._median = median_survival_times(self.survival_function_) - self.percentile = functools.partial(qth_survival_time, model_or_survival_function=self.survival_function_) self._cumulative_sq_ = cumulative_sq_ setattr(self, "confidence_interval_" + primary_estimate_name, self.confidence_interval_) diff --git a/lifelines/fitters/nelson_aalen_fitter.py b/lifelines/fitters/nelson_aalen_fitter.py index 7d12c030d..e0d9e3449 100644 --- a/lifelines/fitters/nelson_aalen_fitter.py +++ b/lifelines/fitters/nelson_aalen_fitter.py @@ -105,8 +105,10 @@ def fit( self, with new properties like ``cumulative_hazard_``. """ + durations = np.asarray(durations) check_nans_or_infs(durations) if event_observed is not None: + event_observed = np.asarray(event_observed) check_nans_or_infs(event_observed) if weights is not None: diff --git a/lifelines/fitters/npmle.py b/lifelines/fitters/npmle.py index dd2447507..5686fb5eb 100644 --- a/lifelines/fitters/npmle.py +++ b/lifelines/fitters/npmle.py @@ -13,6 +13,7 @@ from numpy.linalg import norm import pandas as pd from lifelines.utils import ConvergenceWarning +from typing import * interval = namedtuple("Interval", ["left", "right"]) @@ -26,7 +27,7 @@ def __init__(self): self.min = np.inf self.max = -np.inf - def add(self, value): + def add(self, value: float): if value > self.max: self.max = value if value < self.min: @@ -37,23 +38,45 @@ def __iter__(self): yield self.max -def E_step_M_step(observation_intervals, p_old, turnbull_interval_lookup, weights): +def temper(i: int, optimize) -> float: + if optimize: + return 0.9 * (2 * np.arctan(i / 100) / np.pi) + 1 + else: + return 1.0 + +def E_step_M_step(observation_intervals, p_old, turnbull_interval_lookup, weights, i, optimize) -> np.ndarray: + """ + See [1], but also modifications. + + References + ----------- + 1. Clifford Anderson-Bergman (2016): An efficient implementation of the + EMICM algorithm for the interval censored NPMLE, Journal of Computational and Graphical + Statistics, DOI: 10.1080/10618600.2016.1208616 + """ N = 0 - p_new = np.zeros_like(p_old) + m = np.zeros_like(p_old) + P = cumulative_sum(p_old) for observation_interval, w in zip(observation_intervals, weights): # find all turnbull intervals, t, that are contained in (ol, or). Call this set T # the denominator is sum of p_old[T] probabilities # the numerator is p_old[t] - min_, max_ = turnbull_interval_lookup[observation_interval] - p_new[min_ : max_ + 1] += w * p_old[min_ : max_ + 1] / p_old[min_ : max_ + 1].sum() + m[min_ : max_ + 1] += w / (P[max_ + 1] - P[min_]).sum() N += w - return p_new / N + p_new = p_old * (m / N) ** temper(i, optimize) + p_new /= p_new.sum() + return p_new + + +def cumulative_sum(p: np.ndarray) -> np.ndarray: + # return np.insert(p, 0, 0).cumsum() + return np.concatenate((np.zeros(1), p)).cumsum() -def create_turnbull_intervals(left, right): +def create_turnbull_intervals(left, right) -> List[interval]: """ obs are [] turnbulls are [] @@ -72,14 +95,16 @@ def create_turnbull_intervals(left, right): return intervals -def is_subset(query_interval, super_interval): +def is_subset(query_interval: interval, super_interval: interval) -> bool: """ assumes query_interval is [], and super_interval is (] """ return super_interval.left <= query_interval.left and query_interval.right <= super_interval.right -def create_turnbull_lookup(turnbull_intervals, observation_intervals): +def create_turnbull_lookup( + turnbull_intervals: List[interval], observation_intervals: List[interval] +) -> Dict[interval, List[interval]]: turnbull_lookup = defaultdict(min_max) @@ -95,27 +120,41 @@ def create_turnbull_lookup(turnbull_intervals, observation_intervals): return {o: list(s) for o, s in turnbull_lookup.items()} -def check_convergence(p_new, p_old, tol, i, verbose=False): +def check_convergence( + p_new: np.ndarray, + p_old: np.ndarray, + turnbull_lookup: Dict[interval, List[interval]], + weights: np.ndarray, + tol: float, + i: int, + verbose=False, +) -> bool: + old_ll = log_likelihood(p_old, turnbull_lookup, weights) + new_ll = log_likelihood(p_new, turnbull_lookup, weights) + delta = new_ll - old_ll if verbose: - print("Iteration %d: delta: %.6f" % (i, norm(p_new - p_old))) - if norm(p_new - p_old) < tol: + print("Iteration %d " % i) + print(" delta log-likelihood: %.10f" % delta) + print(" log-like: %.6f" % log_likelihood(p_new, turnbull_lookup, weights)) + if (delta < tol) and (delta >= 0): return True return False -def create_observation_intervals(obs): +def create_observation_intervals(obs) -> List[interval]: return [interval(l, r) for l, r in obs] -def odds(p): - return p / (1 - p) +def log_odds(p: np.ndarray) -> np.ndarray: + return np.log(p) - np.log(1 - p) -def probs(o): +def probs(log_odds: np.ndarray) -> np.ndarray: + o = np.exp(log_odds) return o / (o + 1) -def npmle(left, right, tol=1e-5, weights=None, verbose=False, max_iter=1e5): +def npmle(left, right, tol=1e-7, weights=None, verbose=False, max_iter=1e5, optimize=False, fit_method="em"): """ left and right are closed intervals. TODO: extend this to open-closed intervals. @@ -135,6 +174,54 @@ def npmle(left, right, tol=1e-5, weights=None, verbose=False, max_iter=1e5): observation_intervals = create_observation_intervals(unique_obs) turnbull_lookup = create_turnbull_lookup(turnbull_intervals, observation_intervals) + if fit_method == "em": + p = expectation_maximization_fit( + observation_intervals, turnbull_intervals, turnbull_lookup, weights, tol, max_iter, optimize, verbose + ) + elif fit_method == "scipy": + p = scipy_minimize_fit(turnbull_lookup, turnbull_intervals, weights, tol, verbose) + + return p, turnbull_intervals + + +def scipy_minimize_fit(turnbull_interval_lookup, turnbull_intervals, weights, tol, verbose): + import autograd.numpy as anp + from autograd import value_and_grad + from scipy.optimize import minimize + + def cumulative_sum(p): + return anp.concatenate((anp.zeros(1), p)).cumsum() + + def negative_log_likelihood(p, turnbull_interval_lookup, weights): + P = cumulative_sum(p) + ix = anp.array(list(turnbull_interval_lookup.values())) + return -(weights * anp.log(P[ix[:, 1] + 1] - P[ix[:, 0]])).sum() + + def con(p): + return p.sum() - 1 + + # initialize to equal weight + T = len(turnbull_intervals) + p = 1 / T * np.ones(T) + + cons = {"type": "eq", "fun": con} + results = minimize( + value_and_grad(negative_log_likelihood), + args=(turnbull_interval_lookup, weights), + x0=p, + bounds=[(0, 1)] * T, + jac=True, + constraints=cons, + tol=tol, + options={"disp": verbose}, + ) + return results.x + + +def expectation_maximization_fit( + observation_intervals, turnbull_intervals, turnbull_lookup, weights, tol, max_iter, optimize, verbose +): + # convergence init converged = False i = 0 @@ -144,18 +231,17 @@ def npmle(left, right, tol=1e-5, weights=None, verbose=False, max_iter=1e5): p = 1 / T * np.ones(T) while (not converged) and (i < max_iter): - p_new = E_step_M_step(observation_intervals, p, turnbull_lookup, weights) - converged = check_convergence(p_new, p, tol, i, verbose=verbose) + new_p = E_step_M_step(observation_intervals, p, turnbull_lookup, weights, i, optimize) + converged = check_convergence(new_p, p, turnbull_lookup, weights, tol, i, verbose=verbose) # find alpha that maximizes ll using a line search - best_alpha, best_p, best_ll = None, None, -np.inf - delta = odds(p_new) - odds(p) + best_p, best_ll = None, -np.inf + delta = log_odds(new_p) - log_odds(p) for alpha in np.array([1.0, 1.25, 1.95]): - p_temp = probs(odds(p) + alpha * delta) - ll_temp = log_likelihood(observation_intervals, p_temp, turnbull_lookup, weights) + p_temp = probs(log_odds(p) + alpha * delta) + ll_temp = log_likelihood(p_temp, turnbull_lookup, weights) if best_ll < ll_temp: best_ll = ll_temp - best_alpha = alpha best_p = p_temp p = best_p @@ -165,19 +251,18 @@ def npmle(left, right, tol=1e-5, weights=None, verbose=False, max_iter=1e5): if i >= max_iter: warnings.warn("Exceeded max iterations", ConvergenceWarning) - return p, turnbull_intervals - + return p -def log_likelihood(observation_intervals, p, turnbull_interval_lookup, weights): - ll = 0 - for observation_interval, w in zip(observation_intervals, weights): - min_, max_ = turnbull_interval_lookup[observation_interval] - ll += w * np.log(p[min_ : max_ + 1].sum()) - return ll +def log_likelihood(p: np.ndarray, turnbull_interval_lookup, weights) -> float: + P = cumulative_sum(p) + ix = np.array(list(turnbull_interval_lookup.values())) + return (weights * np.log(P[ix[:, 1] + 1] - P[ix[:, 0]])).sum() -def reconstruct_survival_function(probabilities, turnbull_intervals, timeline=None, label="NPMLE"): +def reconstruct_survival_function( + probabilities: np.ndarray, turnbull_intervals: List[interval], timeline=None, label="NPMLE" +) -> pd.DataFrame: if timeline is None: timeline = [] diff --git a/lifelines/tests/test_estimation.py b/lifelines/tests/test_estimation.py index 5ce38510f..39cfb0c61 100644 --- a/lifelines/tests/test_estimation.py +++ b/lifelines/tests/test_estimation.py @@ -528,7 +528,7 @@ def test_ci_is_not_all_nan(self, positive_sample_lifetimes, univariate_fitters): pass assert not (pd.isnull(fitter.confidence_interval_)).all().all() - def test_lists_as_input(self, positive_sample_lifetimes, univariate_fitters): + def test_lists_and_tuples_as_input(self, positive_sample_lifetimes, univariate_fitters): T, C = positive_sample_lifetimes for f in univariate_fitters: fitter = f() @@ -536,21 +536,29 @@ def test_lists_as_input(self, positive_sample_lifetimes, univariate_fitters): if isinstance(fitter, NelsonAalenFitter): with_array = fitter.fit(T, C).cumulative_hazard_ with_list = fitter.fit(list(T), list(C)).cumulative_hazard_ + with_tuple = fitter.fit(tuple(T), tuple(C)).cumulative_hazard_ assert_frame_equal(with_list, with_array) + assert_frame_equal(with_tuple, with_array) else: with_array = fitter.fit(T, C).survival_function_ with_list = fitter.fit(list(T), list(C)).survival_function_ + with_tuple = fitter.fit(tuple(T), tuple(C)).survival_function_ assert_frame_equal(with_list, with_array) + assert_frame_equal(with_tuple, with_array) if isinstance(fitter, ParametricUnivariateFitter): with_array = fitter.fit_interval_censoring(T, T + 1, (T == T + 1)).survival_function_ with_list = fitter.fit_interval_censoring(list(T), list(T + 1), list((T == T + 1))).survival_function_ + with_tuple = fitter.fit_interval_censoring(tuple(T), tuple(T + 1), tuple((T == T + 1))).survival_function_ assert_frame_equal(with_list, with_array) + assert_frame_equal(with_tuple, with_array) with_array = fitter.fit_left_censoring(T, C).survival_function_ with_list = fitter.fit_left_censoring(list(T), list(C)).survival_function_ + with_tuple = fitter.fit_left_censoring(tuple(T), tuple(C)).survival_function_ assert_frame_equal(with_list, with_array) + assert_frame_equal(with_tuple, with_array) def test_subtraction_function(self, positive_sample_lifetimes, univariate_fitters): T2 = np.arange(1, 50) diff --git a/lifelines/tests/test_npmle.py b/lifelines/tests/test_npmle.py index c7da5e2b6..7ec38125a 100644 --- a/lifelines/tests/test_npmle.py +++ b/lifelines/tests/test_npmle.py @@ -2,7 +2,9 @@ import pytest from lifelines.fitters.npmle import npmle, is_subset, create_turnbull_intervals, interval, reconstruct_survival_function from numpy import testing as npt +from lifelines.datasets import load_mice import numpy as np +import pandas as pd def test_is_subset(): @@ -33,13 +35,13 @@ def test_create_turnbull_intervals(): def test_npmle(): left, right = [1, 8, 8, 7, 7, 17, 37, 46, 46, 45], [7, 8, 10, 16, 14, np.inf, 44, np.inf, np.inf, np.inf] - npt.assert_allclose(npmle(left, right)[0], np.array([0.16667016, 0.33332984, 0.125, 0.375]), rtol=1e-4) + npt.assert_allclose(npmle(left, right, verbose=True)[0], np.array([0.16667016, 0.33332984, 0.125, 0.375]), rtol=1e-4) def test_npmle_with_weights_is_identical_if_uniform_weights(): left, right = [1, 8, 8, 7, 7, 17, 37, 46, 46, 45], [7, 8, 10, 16, 14, np.inf, 44, np.inf, np.inf, np.inf] weights = 2 * np.ones_like(right) - npt.assert_allclose(npmle(left, right)[0], np.array([0.16667016, 0.33332984, 0.125, 0.375]), rtol=1e-4) + npt.assert_allclose(npmle(left, right, verbose=True)[0], np.array([0.16667016, 0.33332984, 0.125, 0.375]), rtol=1e-4) def test_npmle_with_weights(): @@ -61,3 +63,17 @@ def test_sf_doesnt_return_nans(): npt.assert_allclose(results[0], [0.5, 0.5]) sf = reconstruct_survival_function(*results, timeline=[6, 7, 8, 16, 20]) assert not np.isnan(sf.values).any() + + +def test_mice_and_optimization_flag(): + df = load_mice() + results = npmle(df["l"], df["u"], verbose=True, optimize=True) + npt.assert_allclose(results[0][0], 1 - 0.8571429, rtol=1e-4) + npt.assert_allclose(results[0][-1], 0.166667, rtol=1e-4) + + +def test_mice_scipy(): + df = load_mice() + results = npmle(df["l"], df["u"], verbose=True, fit_method="scipy") + npt.assert_allclose(results[0][0], 1 - 0.8571429, rtol=1e-4) + npt.assert_allclose(results[0][-1], 0.166667, rtol=1e-4) diff --git a/lifelines/utils/__init__.py b/lifelines/utils/__init__.py index f5cf5e47b..49e412c3c 100644 --- a/lifelines/utils/__init__.py +++ b/lifelines/utils/__init__.py @@ -918,7 +918,7 @@ def _preprocess_inputs(durations, event_observed, timeline, entry, weights): if event_observed is None: event_observed = np.ones(n, dtype=int) else: - event_observed = np.asarray(event_observed).reshape((n,)).copy().astype(int) + event_observed = np.asarray(event_observed).reshape((n,)).astype(int) if entry is not None: entry = np.asarray(entry).reshape((n,)) diff --git a/lifelines/version.py b/lifelines/version.py index f20f10eee..f656938bc 100644 --- a/lifelines/version.py +++ b/lifelines/version.py @@ -1,4 +1,4 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals -__version__ = "0.24.8" +__version__ = "0.24.9"