Skip to content

Commit

Permalink
Merge pull request #99 from CamDavidsonPilon/test_improvements
Browse files Browse the repository at this point in the history
Fix dataset inclusion
  • Loading branch information
spacecowboy committed Nov 30, 2014
2 parents 51714eb + cc87af7 commit ee278b0
Show file tree
Hide file tree
Showing 16 changed files with 3,485 additions and 3,427 deletions.
File renamed without changes.
File renamed without changes.
49 changes: 49 additions & 0 deletions lifelines/datasets.py → lifelines/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
import numpy as np
from io import StringIO
from pkg_resources import resource_filename


__all__ = ['generate_waltons_dataset',
'generate_regression_dataset',
Expand All @@ -10,6 +12,53 @@
'generate_rossi_dataset']


def load_dataset(filename, usecols=None):
'''
Load a dataset from lifelines.datasets
Parameters:
filename : for example "larynx.csv"
usecols : list of columns in file to use
Returns : Pandas dataframe
'''
return pd.read_csv(resource_filename('lifelines',
'datasets/' + filename),
usecols=usecols)


def load_canadian_senators(usecols=None):
return load_dataset('canadian_senators.csv', usecols)


def load_dd(usecols=None):
return load_dataset('dd.csv', usecols)


def load_kidney_transplant(usecols=None):
return load_dataset('kidney_transplant.csv', usecols)


def load_larynx(usecols=None):
return load_dataset('larynx.csv', usecols)


def load_lung(usecols=None):
return load_dataset('lung.csv', usecols)


def load_panel_test(usecols=None):
return load_dataset('panel_test.csv', usecols)


def load_psychiatric_patients(usecols=None):
return load_dataset('psychiatric_patients.csv', usecols)


def load_static_test(usecols=None):
return load_dataset('static_test.csv', usecols)


def generate_lcd_dataset():
return {
'alluvial_fan': {
Expand Down
File renamed without changes.
File renamed without changes.
6,746 changes: 3,373 additions & 3,373 deletions datasets/divorce.dat → lifelines/datasets/divorce.dat

Large diffs are not rendered by default.

86 changes: 43 additions & 43 deletions datasets/gehan.dat → lifelines/datasets/gehan.dat
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
2 6 1
2 6 1
2 6 1
2 6 0
2 7 1
2 9 0
2 10 1
2 10 0
2 11 0
2 13 1
2 16 1
2 17 0
2 19 0
2 20 0
2 22 1
2 23 1
2 25 0
2 32 0
2 32 0
2 34 0
2 35 0
1 1 1
1 1 1
1 2 1
1 2 1
1 3 1
1 4 1
1 4 1
1 5 1
1 5 1
1 8 1
1 8 1
1 8 1
1 8 1
1 11 1
1 11 1
1 12 1
1 12 1
1 15 1
1 17 1
1 22 1
1 23 1
2 6 1
2 6 1
2 6 1
2 6 0
2 7 1
2 9 0
2 10 1
2 10 0
2 11 0
2 13 1
2 16 1
2 17 0
2 19 0
2 20 0
2 22 1
2 23 1
2 25 0
2 32 0
2 32 0
2 34 0
2 35 0
1 1 1
1 1 1
1 2 1
1 2 1
1 3 1
1 4 1
1 4 1
1 5 1
1 5 1
1 8 1
1 8 1
1 8 1
1 8 1
1 11 1
1 11 1
1 12 1
1 12 1
1 15 1
1 17 1
1 22 1
1 23 1

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
6 changes: 6 additions & 0 deletions lifelines/tests/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
import unittest
from . import test_suite


if __name__ == '__main__':
unittest.main(module=test_suite)
21 changes: 12 additions & 9 deletions lifelines/tests/test_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@
from ..generate_datasets import *
from ..plotting import plot_lifetimes
from ..utils import *
from ..datasets import generate_lcd_dataset, generate_rossi_dataset, \
generate_waltons_dataset, generate_regression_dataset
from ..datasets import (generate_lcd_dataset, generate_rossi_dataset,
generate_waltons_dataset, generate_regression_dataset,
load_larynx, load_panel_test, load_kidney_transplant)


class MiscTests(unittest.TestCase):

def test_unnormalize(self):
df = pd.read_csv('./datasets/larynx.csv')
df = load_larynx()
m = df.mean(0)
s = df.std(0)

Expand All @@ -45,7 +46,7 @@ def test_unnormalize(self):
npt.assert_almost_equal(df.values, unnormalize(ndf, m, s).values)

def test_normalize(self):
df = pd.read_csv('./datasets/larynx.csv')
df = load_larynx()
n, d = df.shape
npt.assert_almost_equal(normalize(df).mean(0).values, np.zeros(d))
npt.assert_almost_equal(normalize(df).std(0).values, np.ones(d))
Expand Down Expand Up @@ -574,7 +575,7 @@ def test_tall_data_points(self):

@unittest.skipUnless("DISPLAY" in os.environ, "requires display")
def test_aaf_panel_dataset(self):
panel_dataset = pd.read_csv('./datasets/panel_test.csv')
panel_dataset = load_panel_test()
self.aaf.fit(panel_dataset, id_col='id', duration_col='t', event_col='E')
self.aaf.plot()
return
Expand Down Expand Up @@ -694,7 +695,7 @@ def test_predict_methods_in_regression_return_same_types(self):
self.assertEqual(type(getattr(self.aaf, fit_method)(x)), type(getattr(self.cph, fit_method)(x)))

def test_duration_vector_can_be_normalized(self):
df = pd.read_csv('./datasets/kidney_transplant.csv')
df = load_kidney_transplant()
t = df['time']
normalized_df = df.copy()
normalized_df['time'] = (normalized_df['time'] - t.mean()) / t.std()
Expand Down Expand Up @@ -1109,7 +1110,9 @@ def test_output_against_R(self):

def test_coef_output_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
# see example 8.3 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
df = pd.read_csv('./datasets/kidney_transplant.csv', usecols=['time', 'death', 'black_male', 'white_male', 'black_female'])
df = load_kidney_transplant(usecols=['time', 'death',
'black_male', 'white_male',
'black_female'])
cf = CoxPHFitter(normalize=False)
cf.fit(df, duration_col='time', event_col='death')

Expand All @@ -1120,7 +1123,7 @@ def test_coef_output_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschbe

def test_se_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
# see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
df = pd.read_csv('./datasets/larynx.csv')
df = load_larynx()
cf = CoxPHFitter(normalize=False)
cf.fit(df, duration_col='time', event_col='death')

Expand All @@ -1131,7 +1134,7 @@ def test_se_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self

def test_p_value_against_Survival_Analysis_by_John_Klein_and_Melvin_Moeschberger(self):
# see table 8.1 in Survival Analysis by John P. Klein and Melvin L. Moeschberger, Second Edition
df = pd.read_csv('./datasets/larynx.csv')
df = load_larynx()
cf = CoxPHFitter()
cf.fit(df, duration_col='time', event_col='death')

Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def read(fname):
license="MIT",
keywords="survival analysis statistics data analysis",
url="https://github.com/CamDavidsonPilon/lifelines",
packages=['lifelines', 'lifelines.tests'],
packages=['lifelines', 'lifelines.datasets', 'lifelines.tests'],
long_description=read('README.txt'),
classifiers=[
"Development Status :: 4 - Beta",
Expand All @@ -51,7 +51,7 @@ def read(fname):
"../LICENSE",
"../MANIFEST.in",
"../*.ipynb",
"../datasets/*",
"datasets/*",
]
},
ext_modules=[ext_fstat]
Expand Down

0 comments on commit ee278b0

Please sign in to comment.