Skip to content

Commit

Permalink
remove errors and query optimization
Browse files Browse the repository at this point in the history
  • Loading branch information
Magdalena5 committed Oct 7, 2021
1 parent 9137a65 commit 9e81b6f
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 11,392 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Not recommended for pure deployment.
* Develop


## Data Import ### [(detailed documentation)](https://github.com/dieterich-lab/medex/tree/time-series/dataset_examples/Data_import.md)
### Data Import [(detailed documentation)](https://github.com/dieterich-lab/medex/tree/time-series/dataset_examples/Data_import.md)
* Database imports run every night at 5:05 and at startup.
* The database is only updated if there is new data to import.
* In order to add new data add a new `entities.csv` and `dataset.csv` to the `./import` folder.
Expand Down
11,280 changes: 0 additions & 11,280 deletions dataset_examples/dataset_test.csv

This file was deleted.

6 changes: 5 additions & 1 deletion modules/import_dataset_postgre.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def load_data(entities, dataset, header, rdb):
if 'Visit' in header:
line = [i] + row[0:6] + [";".join([str(x) for x in row[6:]])]
else:
line = [i] + row[0:1] + [1] + row[1:5] + [";".join([str(x) for x in row[5:]])]
line = [i] + row[0:2] + [1] + row[2:5] + [";".join([str(x) for x in row[5:]])]
if len(line) < 6:
print("This line doesn't have appropriate format:", line)
else:
Expand Down Expand Up @@ -144,6 +144,8 @@ def alter_table(rdb):

sql12 = """CREATE INDEX IF NOT EXISTS "Key_index_numerical" ON examination_numerical ("Key")"""
sql13 = """CREATE INDEX IF NOT EXISTS "Key_index_categorical" ON examination_categorical ("Key")"""
sql15 = """CREATE INDEX IF NOT EXISTS "ID_index_numerical" ON examination_numerical ("Name_ID")"""
sql16 = """CREATE INDEX IF NOT EXISTS "ID_index_categorical" ON examination_categorical ("Name_ID")"""
sql14 = """CREATE EXTENSION IF NOT EXISTS tablefunc"""

try:
Expand All @@ -168,6 +170,8 @@ def alter_table(rdb):
cur.execute(sql12)
cur.execute(sql13)
cur.execute(sql14)
cur.execute(sql15)
cur.execute(sql16)
rdb.commit()
except Exception:
return print("Problem with connection with database")
Expand Down
3 changes: 2 additions & 1 deletion modules/import_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,12 @@ def start_import(rdb):
return print("Data set not changed", file=sys.stderr)
else:
if not os.path.isfile(header):
header = ['Name_ID', 'measurement']
header = ['Name_ID','Case_ID', 'measurement']
else:
with open(header, 'r') as in_file:
for row in in_file:
header = row.replace("\n", "").split(",")
header = header[0:3]
# use function from import_dataset_postgre.py to create tables in database
print("Start create tables")
idp.create_table(rdb)
Expand Down
181 changes: 98 additions & 83 deletions modules/load_data_postgre.py

Large diffs are not rendered by default.

12 changes: 7 additions & 5 deletions url_handlers/boxplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,21 +46,23 @@ def post_boxplots():

# handling errors and load data from database
error = None
if not measurement:
if measurement == "Search entity":
error = "Please select number of {}".format(measurement_name)
elif numeric_entities == "Search entity" or categorical_entities == "Search entity":
error = "Please select entity"
elif not subcategory_entities:
error = "Please select subcategory"
if not error:
df, error = ps.get_num_cat_values(numeric_entities, categorical_entities, subcategory_entities, measurement,case_ids,
elif not error:
print(subcategory_entities)
df, error = ps.get_num_cat_values(numeric_entities, categorical_entities, subcategory_entities, measurement, case_ids,
categorical_filter, categorical_names, numerical_filter_name, from1, to1,
measurement_filter, date, rdb)
df = filtering.checking_for_block(block, df, Name_ID, measurement_name)
df = df.rename(columns={"Name_ID": "{}".format(Name_ID), "measurement": "{}".format(measurement_name)})
numeric_entities_unit, error = ps.get_unit(numeric_entities, rdb)
if numeric_entities_unit:
numeric_entities_unit = numeric_entities + ' (' + numeric_entities_unit + ')'
df.columns = [Name_ID,measurement_name, numeric_entities_unit,categorical_entities]
print(df)
df.columns = [Name_ID,measurement_name, numeric_entities_unit,categorical_entities]
else:
numeric_entities_unit = numeric_entities
if not error:
Expand Down
3 changes: 3 additions & 0 deletions url_handlers/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def get_data():
numerical_filter = filtering.check_for_numerical_filter_get()
categorical_filter, categorical_names = filtering.check_for_filter_get()
return render_template('data.html',
block=block,
all_entities=all_entities,
name=measurement_name,
start_date=session.get('start_date'),
Expand Down Expand Up @@ -62,6 +63,7 @@ def post_data():
if error:
return render_template('data.html',
error=error,
block=block,
all_entities=all_entities,
all_measurement=all_measurement,
name=measurement_name,
Expand Down Expand Up @@ -102,6 +104,7 @@ def post_data():

return render_template('data.html',
error=error,
block=block,
all_entities=all_entities,
all_measurement=all_measurement,
measurement=measurement,
Expand Down
1 change: 1 addition & 0 deletions url_handlers/filtering.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def check_for_filter_post():
session['categorical_filter'] = categorical_filter
session['categorical_names'] = categorical_names
categorical_filter_zip = None
print(categorical_filter)
if categorical_filter is not None:
categorical_filter_zip = zip(categorical_names, categorical_filter)

Expand Down
1 change: 1 addition & 0 deletions url_handlers/histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def post_statistics():
numeric_entities_unit, error = ps.get_unit(numeric_entities, rdb)
if numeric_entities_unit:
numeric_entities_unit = numeric_entities + ' (' + numeric_entities_unit + ')'
print(df)
df.columns = [Name_ID,measurement_name, numeric_entities_unit,categorical_entities]
else:
numeric_entities_unit = numeric_entities
Expand Down
3 changes: 2 additions & 1 deletion url_handlers/scatter_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def post_plots():
categorical_filter, categorical_names, numerical_filter_name,
from1, to1, measurement_filter, date, rdb)


x_unit, error = ps.get_unit(x_axis, rdb)
y_unit, error = ps.get_unit(y_axis, rdb)
if x_unit and y_unit:
Expand All @@ -92,7 +93,7 @@ def post_plots():
from1, to1, measurement_filter, date, rdb)
if not error:

categorical_df = numeric_df.merge(df, on="Patient_ID").dropna()
categorical_df = numeric_df.merge(df, on="Name_ID").dropna()
categorical_df = categorical_df.sort_values(by=[categorical_entities])
categorical_df = categorical_df.rename(
columns={"Name_ID": "{}".format(Name_ID), "measurement": "{}".format(measurement_name)})
Expand Down
46 changes: 26 additions & 20 deletions webserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ def check_for_env(key: str, default=None, cast=None):
block = 'block'


# data store for filters and download
class DataStore():

case_ids = []
table_case_ids = None

# for table browser server side
table_schema = None
table_browser_column = None
dict = None
table_browser_entities = None
table_browser_what_table = None
table_browser_column2 = None




table_builder = TableBuilder()
data = DataStore()


# favicon
@app.route('/favicon.ico')
def favicon():
Expand All @@ -73,8 +94,8 @@ def favicon():
# information about database
@app.context_processor
def message_count():
case_id = session.get('case_ids')
if case_id != None:
case_id = data.case_ids
if case_id :
case_display = 'block'
else:
case_display = 'none'
Expand Down Expand Up @@ -106,21 +127,6 @@ def message_count():
case_display=case_display)


# data store for filters and download
class DataStore():

# for table browser server side
table_schema = None
table_browser_column = None
dict = None
table_browser_entities = None
table_browser_what_table = None
table_browser_column2 = None


table_builder = TableBuilder()
data = DataStore()

# Urls in the 'url_handlers' directory (one file for each new url)
# import a Blueprint
from url_handlers.data import data_page
Expand Down Expand Up @@ -167,8 +173,8 @@ def get_cases():
session_id_json = {"session_id": "{}".format(session_id)}
cases_get = requests.post(EXPRESS_MEDEX_MEDDUSA_URL, json=session_id_json)
case_ids = cases_get.json()
session['case_ids'] = case_ids['cases_ids']
session['table_case_ids'] = pd.DataFrame(case_ids['cases_ids'], columns=["Case_ID"]).to_csv(index=False)
data.case_ids = case_ids['cases_ids']
data.table_case_ids = pd.DataFrame(case_ids['cases_ids'], columns=["Case_ID"]).to_csv(index=False)

return redirect('/data')

Expand All @@ -179,7 +185,7 @@ def download(filename):
if filename == 'data.csv':
csv = data.csv
elif filename == 'case_ids.csv':
csv = session.get('table_case_ids')
csv = data.table_case_ids
# Create a string buffer
buf_str = io.StringIO(csv)

Expand Down

0 comments on commit 9e81b6f

Please sign in to comment.