- {% if error %}
-
- {% endif %}
-
+
+ Plot data
+
+
+{% if plot_series %}
+
+{% endif %}
+{% endblock %}
\ No newline at end of file
diff --git a/templates/show_entities.html b/templates/show_entities.html
deleted file mode 100644
index 06211224..00000000
--- a/templates/show_entities.html
+++ /dev/null
@@ -1,40 +0,0 @@
-{% extends "layout.html" %}
-{% block body %}
-
-
- {{ num_patients }}
-
-{% endblock %}
diff --git a/test.txt b/test.txt
deleted file mode 100644
index 7fbc42f8..00000000
--- a/test.txt
+++ /dev/null
@@ -1 +0,0 @@
-abc.abc.abc
diff --git a/url_handlers/barchart.py b/url_handlers/barchart.py
new file mode 100644
index 00000000..4af3d4b5
--- /dev/null
+++ b/url_handlers/barchart.py
@@ -0,0 +1,97 @@
+from flask import Blueprint, render_template, request
+import collections
+import pandas as pd
+import json
+import plotly
+import plotly.graph_objs as go
+
+import data_warehouse.redis_rwh as rwh
+
+barchart_page = Blueprint('barchart', __name__,
+ template_folder='templates')
+
+
+@barchart_page.route('/barchart', methods=['GET'])
+def get_statistics():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ all_categorical_entities = rwh.get_categorical_entities(rdb)
+ all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
+
+ return render_template('barchart.html',
+ numeric_tab=True,
+ all_categorical_entities=all_categorical_only_entities)
+
+
+@barchart_page.route('/barchart', methods=['POST'])
+def post_statistics():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_categorical_entities = rwh.get_categorical_entities(rdb)
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
+
+
+ selected_entities = request.form.getlist('categorical_entities')
+
+
+ error = None
+ if not selected_entities:
+ error = "Please select entities"
+ elif selected_entities:
+ categorical_df, error = rwh.get_joined_categorical_values(selected_entities, rdb)
+ error = "No data based on the selected entities ( " + ", ".join(categorical_df) + " ) " if error else None
+
+
+
+ if error:
+ return render_template('barchart.html',
+ categorical_tab=True,
+ all_categorical_entities=all_categorical_only_entities,
+ selected_c_entities=selected_entities,
+ error=error
+ )
+
+ entity_values = {}
+ key =[]
+ plot_series = []
+ data = []
+ for entity in selected_entities:
+ counter = collections.Counter(categorical_df[entity])
+ values_c = list(counter.values())
+ key_c = list(counter.keys())
+ key.append(list(counter.keys()))
+ data.append(go.Bar(x=key_c, y=values_c, name=entity, width=0.3))
+ plot_series.append({
+ 'x': key_c,
+ 'y': values_c,
+ 'name': entity,
+ 'type': "bar",
+ 'width': 0.3
+ })
+ entity_df = pd.DataFrame(columns=[entity], data=categorical_df[entity].dropna())
+ list_of_values = set(entity_df[entity].unique())
+ entity_values[entity] = {}
+ for value in list_of_values:
+ entity_values[entity][value] = len(entity_df.loc[entity_df[entity] == value])
+
+
+ layout = go.Layout(
+ barmode='stack',
+ template = 'plotly_white'
+ )
+
+ data = go.Figure(data=data, layout=layout)
+ graphJSON = json.dumps(data, cls=plotly.utils.PlotlyJSONEncoder)
+
+ return render_template('barchart.html',
+ categorical_tab=True,
+ all_categorical_entities=all_categorical_only_entities,
+ plot = graphJSON,
+ entity_values=entity_values,
+ selected_c_entities=selected_entities,
+ plot_series=plot_series
+ )
diff --git a/url_handlers/basic_stats.py b/url_handlers/basic_stats.py
index b790137b..a44b06ca 100644
--- a/url_handlers/basic_stats.py
+++ b/url_handlers/basic_stats.py
@@ -1,14 +1,19 @@
from flask import Blueprint, render_template, request
-
import data_warehouse.redis_rwh as rwh
basic_stats_page = Blueprint('basic_stats', __name__,
template_folder='basic_stats')
+
@basic_stats_page.route('/basic_stats', methods=['GET'])
def get_statistics():
- # this import has to be here!!
+ """
+
+ Returns
+ -------
+
+ """
from webserver import get_db
rdb = get_db()
all_numeric_entities = rwh.get_numeric_entities(rdb)
@@ -23,6 +28,13 @@ def get_statistics():
@basic_stats_page.route('/basic_stats', methods=['POST'])
def get_basic_stats():
+ """
+
+ Returns
+ -------
+
+ """
+
from webserver import get_db
rdb = get_db()
all_numeric_entities = rwh.get_numeric_entities(rdb)
@@ -31,8 +43,12 @@ def get_basic_stats():
if 'basic_stats' in request.form:
numeric_entities = request.form.getlist('numeric_entities')
- numeric_df, error = rwh.get_joined_numeric_values(numeric_entities, rdb) if numeric_entities else (None,"Please select numeric entities")
-
+ error = None
+ if numeric_entities:
+ numeric_df, error = rwh.get_joined_numeric_values(numeric_entities, rdb)
+ error = "The selected entities (" + ", ".join(numeric_entities) + ") do not contain any values. " if error else None
+ else:
+ error = "Please select numeric entities"
if error:
return render_template('basic_stats/basic_stats.html',
numeric_tab=True,
@@ -90,8 +106,12 @@ def get_basic_stats():
if 'basic_stats_c' in request.form:
categorical_entities = request.form.getlist('categorical_entities')
# if not categorical_entities:
- categorical_df, error = rwh.get_joined_categorical_values(categorical_entities, rdb) if categorical_entities else (None, "Please select entities")
-
+ error = None
+ if categorical_entities:
+ categorical_df, error = rwh.get_joined_categorical_values(categorical_entities, rdb)
+ error = "No data based on the selected entities ( " + ", ".join(categorical_entities) + " ) " if error else None
+ else:
+ error = "Please select entities"
if error:
return render_template('basic_stats/basic_stats.html',
categorical_tab=True,
@@ -105,8 +125,6 @@ def get_basic_stats():
basic_stats_c[entity] = { }
# if entity in categorical_df.columns:
count = categorical_df[categorical_df.columns.intersection([entity])].count()[entity]
- # else:
- # count = 0
basic_stats_c[entity]['count'] = count
return render_template('basic_stats/basic_stats.html',
@@ -115,3 +133,4 @@ def get_basic_stats():
all_numeric_entities=all_numeric_entities,
selected_c_entities=categorical_entities,
basic_stats_c=basic_stats_c)
+
diff --git a/url_handlers/boxplot.py b/url_handlers/boxplot.py
index 068cf656..a52a9ee5 100644
--- a/url_handlers/boxplot.py
+++ b/url_handlers/boxplot.py
@@ -1,6 +1,8 @@
from flask import Blueprint, render_template, request
import pandas as pd
-
+import json
+import plotly
+import plotly.graph_objs as go
import data_warehouse.redis_rwh as rwh
boxplot_page = Blueprint('boxplot', __name__,
@@ -31,7 +33,7 @@ def post_boxplots():
error = None
if not entity or not group_by or entity == "Choose entity" or group_by == "Choose entity":
- error = "Please select entity and group_by"
+ error = "Please select entity and group by"
# get joined numerical and categorical values
if not error:
@@ -46,10 +48,12 @@ def post_boxplots():
selected_entity=entity,
group_by=group_by,
)
+
+
merged_df = pd.merge(numeric_df, categorical_df, how='inner', on='patient_id')
min_val = numeric_df[entity].min()
max_val = numeric_df[entity].max()
-
+ data =[]
groups = set(categorical_df[group_by].values.tolist())
plot_series = []
for group in sorted(groups):
@@ -58,19 +62,21 @@ def post_boxplots():
values = df[entity].values.tolist()
# print(entity, group, values[:10])
if (values):
+ data.append(go.Box(y=values, name =group))
plot_series.append({
- 'y' : values,
+ 'y': values,
'type': "box",
# 'opacity': 0.5,
'name': group,
})
-
+ graphJSON = json.dumps(data, cls=plotly.utils.PlotlyJSONEncoder)
return render_template('boxplot.html',
categorical_entities=all_categorical_entities,
numeric_entities=all_numeric_entities,
selected_entity=entity,
group_by=group_by,
plot_series=plot_series,
+ plot = graphJSON,
min_val=min_val,
max_val=max_val,
)
diff --git a/url_handlers/clustering.py b/url_handlers/clustering.py
deleted file mode 100644
index 57ce245c..00000000
--- a/url_handlers/clustering.py
+++ /dev/null
@@ -1,185 +0,0 @@
-from flask import Blueprint, render_template, request
-import numpy as np
-
-import data_warehouse.redis_rwh as rwh
-import data_warehouse.data_warehouse_utils as dwu
-
-clustering_page = Blueprint('clustering', __name__,
- template_folder='clustering')
-
-
-@clustering_page.route('/clustering', methods=['GET'])
-def cluster():
- # this import has to be here!!
- from webserver import get_db
- rdb = get_db()
- all_numeric_entities = rwh.get_numeric_entities(rdb)
- all_categorical_entities = rwh.get_categorical_entities(rdb)
- all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
-
- # numeric_df = rwh.get_joined_numeric_values(all_numeric_entities, rdb)
- #
- # min_values = numeric_df.min()
- # max_values = numeric_df.max()
-
- min_max_values = { }
- # for entity in all_numeric_entities:
- # min_max_values[entity] = {
- # 'min': min_values[entity],
- # 'max': max_values[entity],
- # 'step': (max_values[entity] - min_values[entity]) / 100.0
- # }
- return render_template('clustering/clustering.html',
- numeric_tab=True,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- min_max_values=min_max_values,
- )
-
-
-@clustering_page.route('/clustering', methods=['POST'])
-def post_clustering():
- # this import has to be here!!
- from webserver import get_db
- rdb = get_db()
- all_numeric_entities = rwh.get_numeric_entities(rdb)
- all_categorical_entities = rwh.get_categorical_entities(rdb)
- all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
- min_max_values = { }
-
- if 'cluster_numeric' in request.form:
- # transforming back underscores to dots
- numeric_entities_with_underscore = request.form.getlist('numeric_entities')
- numeric_entities = [entity.replace('__', '.') for entity in numeric_entities_with_underscore]
- if not numeric_entities:
- error = "Please select entities"
- return render_template('clustering/clustering.html',
- numeric_tab=True,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- min_max_values=min_max_values,
- error=error,
- )
-
- numeric_standardize = request.form['n_standardize'] == "yes"
- numeric_missing = request.form['n_missing']
- min_max_filter = { }
- for entity in numeric_entities:
- min_max_entity = 'min_max_{}'.format(entity.replace('.', '__'))
- if min_max_entity in request.form:
- min_value, max_value = list(eval(request.form.get(min_max_entity)))
- min_max_filter[entity] = min_value, max_value
-
- min_max_values[entity] = {
- 'min' : min_value,
- 'max' : max_value,
- 'step': (max_value - min_value) / float(100),
- }
-
- if any([entity for entity in numeric_entities]):
- np.random.seed(8675309) # what is this number?
- cluster_data, cluster_labels, df, error = dwu.cluster_numeric_fields(
- numeric_entities,
- rdb,
- standardize=numeric_standardize,
- missing=numeric_missing,
- min_max_filter=min_max_filter,
- )
- if error:
- return render_template('clustering/clustering.html',
- numeric_tab=True,
- selected_n_entities=numeric_entities,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- min_max_values=min_max_values,
- selected_min_max=min_max_filter,
- error=error,
- )
- table_data = { }
- plot_data = []
- for cluster in sorted(cluster_labels.keys()):
- patient_count = cluster_labels[cluster]
- patient_percent = "{:.0%}".format(cluster_data.weights_[cluster])
-
- table_data[cluster] = { }
- table_data[cluster]['patient_count'] = patient_count
- table_data[cluster]['patient_percent'] = patient_percent
- for i, entity in enumerate(numeric_entities):
- mean_value = "{0:.2f}".format(cluster_data.means_[cluster][i])
- table_data[cluster][entity] = mean_value
- # filter by cluster
- entity_series = df[df.cluster == cluster][numeric_entities].dropna().values.round(2).tolist()
- plot_data.append({ "name": "Cluster {}".format(cluster), "data": entity_series })
- any_present = df.shape[0]
- all_present = df.dropna().shape[0]
- return render_template('clustering/clustering.html',
- numeric_tab=True,
- selected_n_entities=numeric_entities,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- any_present=any_present,
- all_present=all_present,
- table_data=table_data,
- plot_data=plot_data,
- min_max_values=min_max_values,
- selected_min_max=min_max_filter,
- )
-
- elif 'cluster_categorical' in request.form:
- eps = float(request.form['eps'])
- min_samples = int(request.form['min_samples'])
- categorical_entities = request.form.getlist('categorical_entities')
- if not categorical_entities:
- error = "Please select entities"
- return render_template('clustering/clustering.html',
- numeric_tab=True,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- min_max_values=min_max_values,
- error=error,
- )
-
- if any([entity for entity in categorical_entities]):
- eps = eps
- min_samples = min_samples
- np.random.seed(8675309)
- cluster_info = dwu.cluster_categorical_entities(
- categorical_entities,
- rdb,
- eps=eps,
- min_samples=min_samples
- )
-
- ccv, cat_rep_np, category_values, categorical_label_uses, cat_df, error = cluster_info
- if error:
- return render_template('clustering/clustering.html',
- categorical_tab=True,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- selected_c_entities=categorical_entities,
- c_cluster_info=cluster_info,
- ccv=cvv_dict,
- min_max_values=min_max_values,
- error=error
- )
- any_present = cat_df.shape[0]
- all_present = cat_df.dropna().shape[0]
-
- # df to dict
- cvv_dict = { }
- for key, value in ccv.items():
- normal_value = value.to_dict()
- cvv_dict[key] = normal_value
-
- return render_template('clustering/clustering.html',
- categorical_tab=True,
- all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- selected_c_entities=categorical_entities,
- c_cluster_info=cluster_info,
- all_present=all_present,
- any_present=any_present,
- ccv=cvv_dict,
- min_max_values=min_max_values,
- )
- # heat_map_data=data)
diff --git a/url_handlers/clustering_pl.py b/url_handlers/clustering_pl.py
new file mode 100644
index 00000000..ac299156
--- /dev/null
+++ b/url_handlers/clustering_pl.py
@@ -0,0 +1,122 @@
+from flask import Blueprint, render_template, request
+import numpy as np
+
+import data_warehouse.redis_rwh as rwh
+import data_warehouse.data_warehouse_utils as dwu
+
+clustering_plot_page = Blueprint('clustering_pl', __name__,
+ template_folder='clustering_pl')
+
+
+@clustering_plot_page.route('/clustering_pl', methods=['GET'])
+def cluster():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+
+
+
+ min_max_values = { }
+
+ return render_template('clustering_pl.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ min_max_values=min_max_values,
+ )
+
+
+@clustering_plot_page.route('/clustering_pl', methods=['POST'])
+def post_clustering():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ min_max_values = { }
+
+
+ # transforming back underscores to dots
+ numeric_entities_with_underscore = request.form.getlist('numeric_entities')
+ numeric_entities = [entity.replace('__', '.') for entity in numeric_entities_with_underscore]
+ if not numeric_entities:
+ error = "Please select entities"
+ return render_template('clustering_pl.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ min_max_values=min_max_values,
+ error=error,
+ )
+
+ numeric_standardize = request.form['n_standardize'] == "yes"
+ numeric_missing = request.form['n_missing']
+ min_max_filter = { }
+ for entity in numeric_entities:
+ min_max_entity = 'min_max_{}'.format(entity.replace('.', '__'))
+ if min_max_entity in request.form:
+ min_value, max_value = list(eval(request.form.get(min_max_entity)))
+ min_max_filter[entity] = min_value, max_value
+
+ min_max_values[entity] = {
+ 'min' : min_value,
+ 'max' : max_value,
+ 'step': (max_value - min_value) / float(100),
+ }
+
+ if any([entity for entity in numeric_entities]):
+ np.random.seed(8675309) # what is this number?
+ cluster_data, cluster_labels, df, error = dwu.cluster_numeric_fields(
+ numeric_entities,
+ rdb,
+ standardize=numeric_standardize,
+ missing=numeric_missing,
+ min_max_filter=min_max_filter,
+ )
+ if error:
+ return render_template('clustering_pl.html',
+ numeric_tab=True,
+ selected_n_entities=numeric_entities,
+ all_numeric_entities=all_numeric_entities,
+ min_max_values=min_max_values,
+ selected_min_max=min_max_filter,
+ error=error,
+ )
+ table_data = { }
+ plot_data = []
+
+ for cluster in sorted(cluster_labels.keys()):
+ patient_count = cluster_labels[cluster]
+ patient_percent = "{:.0%}".format(cluster_data.weights_[cluster])
+
+ table_data[cluster] = { }
+ table_data[cluster]['patient_count'] = patient_count
+ table_data[cluster]['patient_percent'] = patient_percent
+ for i, entity in enumerate(numeric_entities):
+ mean_value = "{0:.2f}".format(cluster_data.means_[cluster][i])
+ table_data[cluster][entity] = mean_value
+ # filter by cluster
+ entity_series = df[df.cluster == cluster][numeric_entities].dropna().values.round(2).tolist()
+ plot_data.append({"name": "Cluster {}".format(cluster), "data": entity_series})
+ plot_data.append({
+ 'x': list(df[df.cluster == cluster][numeric_entities[0]]),
+ 'y': list(df[df.cluster == cluster][numeric_entities[1]]),
+ 'mode': 'markers',
+ 'type': 'scatter',
+ "name": "Cluster {}".format(cluster),
+ })
+
+ any_present = df.shape[0]
+ all_present = df.dropna().shape[0]
+
+ return render_template('clustering_pl.html',
+ numeric_tab=True,
+ selected_n_entities=numeric_entities,
+ all_numeric_entities=all_numeric_entities,
+ any_present=any_present,
+ all_present=all_present,
+ table_data=table_data,
+ plot_data=plot_data,
+ min_max_values=min_max_values,
+ selected_min_max=min_max_filter,
+ )
+
+
diff --git a/url_handlers/coplots.py b/url_handlers/coplots_pl.py
similarity index 69%
rename from url_handlers/coplots.py
rename to url_handlers/coplots_pl.py
index 65d872a2..e7cc9983 100644
--- a/url_handlers/coplots.py
+++ b/url_handlers/coplots_pl.py
@@ -3,11 +3,11 @@
import data_warehouse.redis_rwh as rwh
-coplots_page = Blueprint('coplots', __name__,
+coplots_plot_page = Blueprint('coplots_pl', __name__,
template_folder='templates')
-@coplots_page.route('/coplots', methods=['GET'])
+@coplots_plot_page.route('/coplots_pl', methods=['GET'])
def get_coplots():
# this import has to be here!!
from webserver import get_db
@@ -16,12 +16,12 @@ def get_coplots():
all_categorical_entities = rwh.get_categorical_entities(rdb)
all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
- return render_template('coplots.html',
+ return render_template('coplots_pl.html',
all_numeric_entities=all_numeric_entities,
categorical_entities=all_categorical_only_entities)
-@coplots_page.route('/coplots', methods=['POST'])
+@coplots_plot_page.route('/coplots_pl', methods=['POST'])
def post_coplots():
# this import has to be here!!
from webserver import get_db
@@ -30,7 +30,6 @@ def post_coplots():
all_categorical_entities = rwh.get_categorical_entities(rdb)
all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
- plot_series = []
category1 = request.form.get('category1')
category2 = request.form.get('category2')
x_axis = request.form.get('x_axis')
@@ -60,11 +59,11 @@ def post_coplots():
# get joined categorical values
if not error_message:
categorical_df, error_message = rwh.get_joined_categorical_values([category1, category2], rdb)
- numeric_df, error_message = rwh.get_joined_numeric_values([x_axis, y_axis], rdb) if not error_message else (None, error_message)
+ numeric_df, error = rwh.get_joined_numeric_values([x_axis, y_axis], rdb) if not error_message else (None, error_message)
error_message = "No data based on the selected options" if error_message else None
if error_message:
- return render_template('coplots.html',
+ return render_template('coplots_pl.html',
all_numeric_entities=all_numeric_entities,
categorical_entities=all_categorical_only_entities,
error=error_message,
@@ -83,35 +82,64 @@ def post_coplots():
numeric_df = numeric_df.dropna()
merged_df = pd.merge(numeric_df, categorical_df, how='inner', on='patient_id')
+
+
x_min = merged_df[x_axis].min() if not select_scale else selected_x_min
x_max = merged_df[x_axis].max() if not select_scale else selected_x_max
y_min = merged_df[y_axis].min() if not select_scale else selected_y_min
y_max = merged_df[y_axis].max() if not select_scale else selected_y_max
+
category1_values = merged_df[category1].unique()
category2_values = merged_df[category2].unique()
- if how_to_plot == 'single_plot':
- plot_series = []
- elif how_to_plot == 'multiple_plots':
- plot_series = { }
- for cat1_value in category1_values:
- for cat2_value in category2_values:
+
+
+ count=0
+ plot_series=[]
+ plot_series2 = []
+ layout ={}
+ for i,cat1_value in enumerate(category1_values):
+ for j,cat2_value in enumerate(category2_values):
+ count += 1
df = merged_df.loc[(merged_df[category1] == cat1_value) & (merged_df[category2] == cat2_value)].dropna()
df.columns = ['patient_id', 'x', 'y', 'cat1', 'cat2']
- series = {
- 'name' : '{}_{}'.format(cat1_value, cat2_value),
- 'turboThreshold': len(df),
- 'data' : list(df.T.to_dict().values()),
- 'cat1' : cat1_value,
- 'cat2' : cat2_value,
- 'series_length' : len(df),
- }
- if how_to_plot == 'single_plot':
- plot_series.append(series)
- elif how_to_plot == 'multiple_plots':
- plot_series['{}_{}'.format(cat1_value, cat2_value)] = series
-
- return render_template('coplots.html',
+ plot_series.append({
+ 'x': list(df['x']),
+ 'y': list(df['y']),
+ 'mode': 'markers',
+ 'type': 'scatter',
+ 'xaxis': 'x{}'.format(count),
+ 'yaxis': 'y{}'.format(count),
+ 'name': '{}: {}
{}: {}'.format(category1,cat1_value,category2 ,cat2_value),
+ 'text': list(df['patient_id'])
+ })
+ plot_series2.append({
+ 'x': list(df['x']),
+ 'y': list(df['y']),
+ 'mode': 'markers',
+ 'type': 'scatter',
+ 'name': '{}: {}
{}: {}'.format(category1, cat1_value, category2, cat2_value),
+ 'text': list(df['patient_id'])
+ }
+ )
+ layout.update({
+ 'xaxis{}'.format(count): {
+ 'title': {
+ 'text': x_axis,
+ }
+ },
+ 'yaxis{}'.format(count): {
+ 'title': {
+ 'text': y_axis,
+ }
+ },})
+
+
+ layout.update(title='Compare values of
' + x_axis + ' and
' + y_axis + ' ')
+ layout['grid'] = {'rows': len(category1_values), 'columns': len(category2_values), 'pattern': 'independent'}
+
+
+ return render_template('coplots_pl.html',
all_numeric_entities=all_numeric_entities,
categorical_entities=all_categorical_entities,
category1=category1,
@@ -120,8 +148,10 @@ def post_coplots():
cat2_values=list(category2_values),
x_axis=x_axis,
y_axis=y_axis,
+ layout =layout,
how_to_plot=how_to_plot,
plot_series=plot_series,
+ plot_series2=plot_series2,
select_scale=select_scale,
x_min=x_min,
x_max=x_max,
diff --git a/url_handlers/data_management.py b/url_handlers/data_management.py
deleted file mode 100644
index a55076bc..00000000
--- a/url_handlers/data_management.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import collections
-import time
-import datetime
-
-from flask import Blueprint, render_template, request
-import csv
-import misc.utils as utils
-
-from data_warehouse.redis_rwh import get_connection
-
-data_management_page = Blueprint('data_management_page', __name__,
- template_folder='flaskr/templates')
-
-
-# entities = {
-# 'diagnostik.hauptdiagnose.tumortyp': 'String',
-# 'diagnostik.labor.bare_nuclei': 'String',
-# 'diagnostik.labor.bland_chromatin': 'String',
-# 'diagnostik.labor.clump_thickness': 'String',
-# 'diagnostik.labor.marginal_adhesion': 'String',
-# 'diagnostik.labor.mitosis': 'String',
-# 'diagnostik.labor.normal_nucleoli': 'String',
-# 'diagnostik.labor.single_epithelial_cell_size': 'Integer',
-# 'diagnostik.labor.uniformity_of_cell_shape': 'String',
-# 'diagnostik.labor.uniformity_of_cell_size': 'String',
-# }
-
-@data_management_page.route('/data_management', methods=['POST'])
-def manage_data():
- # example of 1 line in csv file:
- # fe189ec785f674f8d19f3af063c68413, 7dd8eb5937291aedc698e65581be76ca, 2018-05-22, 10:05:00, diagnostik.labor.uniformity_of_cell_size, 10
- csv_file = request.files['csv_file']
- csv_lines = csv_file.stream.read().decode("utf-8").split('\n')
- csv_data = list(csv.reader(csv_lines, delimiter=','))
-
- # example of entities file (also in csv format):
- # diagnostik.hauptdiagnose.tumortyp, String
- # diagnostik.labor.single_epithelial_cell_size, Integer
- entities_file = request.files['entities_file']
- entities_lines = entities_file.stream.read().decode("utf-8").split('\n')
- entities = dict(csv.DictReader(entities_lines, delimiter=','))
-
- # I don't like to hardcode it, we should create a config file
- redis_connection = get_connection('localhost', '6379')
- pong = redis_connection.ping()
-
- update_message = pong
- number_keys = set()
- category_keys = set()
- date_keys = set()
- category_values = collections.defaultdict(set)
-
- redis_commands = []
-
- for line in csv_data:
- (patient_id, quarter_id, date_stamp, time_stamp, key, value) = line
- float_value = utils.try_parse_float(value)
- entity_type = entities.get(key)
-
- if float_value is not None:
- redis_connection.zadd(key, float_value, patient_id)
- redis_commands.append("ZADD {} {} {}".format(key, float_value, patient_id))
- number_keys.add(key)
-
- elif entity_type == 'String':
- kv = "{}.{}".format(key, value)
- redis_connection.sadd(kv, patient_id)
- redis_commands.append("SADD {} {}".format(kv, patient_id))
-
- category_keys.add(key)
- category_values[key].add(value)
-
- elif entity_type == 'null':
- value = time.mktime(datetime.datetime.strptime(date_stamp, "%Y-%m-%d").timetuple())
-
- value = max(value, 0)
- redis_connection.zadd(key, value, patient_id)
- redis_commands.append("ZADD {} {} {}".format(key, value, patient_id))
-
- date_keys.add(key)
-
- for number_key in number_keys:
- redis_connection.sadd("number_keys", number_key)
-
- for date_key in date_keys:
- redis_connection.sadd("date_keys", date_key)
-
- for category_key in category_keys:
- redis_connection.sadd("category_keys", category_key)
-
- for category, cv in category_values.items():
- category_key = "{}_values".format(category)
- for category_value in cv:
- redis_connection.sadd(category_key, category_value)
-
- # if we got to this point, we did not get any exceptions
- update_message = 'Successfully updated'
- return render_template('data_management.html', update_message=update_message)
-
-
-@data_management_page.route('/data_management', methods=['GET'])
-def get_manage_data():
- return render_template('data_management.html')
diff --git a/url_handlers/heatmap.py b/url_handlers/heatmap.py
new file mode 100644
index 00000000..b041eaad
--- /dev/null
+++ b/url_handlers/heatmap.py
@@ -0,0 +1,96 @@
+from flask import Blueprint, render_template, request, jsonify
+import pandas as pd
+from scipy.stats import pearsonr
+
+import data_warehouse.redis_rwh as rwh
+
+heatmap_plot_page = Blueprint('heatmap', __name__,
+ template_folder='tepmlates')
+
+
+@heatmap_plot_page.route('/heatmap', methods=['GET'])
+def get_plots():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ all_categorical_entities = rwh.get_categorical_entities(rdb)
+ all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
+
+ return render_template('heatmap.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities)
+
+
+@heatmap_plot_page.route('/heatmap', methods=['POST'])
+# @login_required
+def post_plots():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ all_categorical_entities = rwh.get_categorical_entities(rdb)
+ all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
+
+
+
+ selected_entities = request.form.getlist('numeric_entities')
+
+ if not selected_entities:
+ error = "Please select entities"
+ return render_template('heatmap.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities,
+ error=error)
+
+
+ numeric_df, err = rwh.get_joined_numeric_values(selected_entities, rdb)
+ if err:
+ return render_template('heatmap.html',
+ error=err,
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities)
+
+ # remove patient id and drop NaN values (this will show only the patients with both values)
+ numeric_df = numeric_df[selected_entities]
+ # numeric_df = numeric_df.dropna()[selected_entities]
+ dfcols = pd.DataFrame(columns=numeric_df.columns)
+ pvalues = dfcols.transpose().join(dfcols, how='outer')
+ corr_values = dfcols.transpose().join(dfcols, how='outer')
+ for r in numeric_df.columns:
+ for c in numeric_df.columns:
+ if c == r:
+ df_corr = numeric_df[[r]].dropna()
+ else:
+ df_corr = numeric_df[[r, c]].dropna()
+ corr_values[r][c], pvalues[r][c] = pearsonr(df_corr[r], df_corr[c])
+
+ pvalues = pvalues.astype(float)
+ pvalues = pvalues.round(decimals=3)
+ pvalues = pvalues.T.values.tolist()
+
+ corr_values = corr_values.astype(float)
+ corr_values = corr_values.round(decimals=2)
+ corr_values = corr_values.T.values.tolist()
+
+
+ plot_series = []
+ plot_series.append({'z': corr_values,
+ 'x' : selected_entities,
+ 'y' : selected_entities,
+ 'type': "heatmap"
+ })
+
+
+
+ return render_template('heatmap.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities,
+ selected_n_entities=selected_entities,
+ plot_series=plot_series
+ )
+
diff --git a/url_handlers/histogram.py b/url_handlers/histogram.py
index fabe4e8f..ec15fc73 100644
--- a/url_handlers/histogram.py
+++ b/url_handlers/histogram.py
@@ -1,6 +1,6 @@
from flask import Blueprint, render_template, request
import pandas as pd
-
+import numpy as np
import data_warehouse.redis_rwh as rwh
histogram_page = Blueprint('histogram', __name__,
@@ -34,37 +34,58 @@ def post_statistics():
if not entity or not group_by or entity == "Choose entity" or group_by == "Choose entity":
error = "Please select entity and group_by"
+ if number_of_bins.isdigit():
+ int_number_of_bins = int(number_of_bins)
+ if int_number_of_bins < 2:
+ error = "Nuber of bins need to be bigger then 1"
+
# get joined numerical and categorical values
if not error:
numeric_df, error = rwh.get_joined_numeric_values([entity], rdb)
categorical_df, error = rwh.get_joined_categorical_values([group_by], rdb) if not error else (None, error)
if error:
- return render_template('histogram.html', categorical_entities=all_categorical_entities,
- numeric_entities=all_numeric_entities, selected_entity=entity, group_by=group_by,
+ return render_template('histogram.html',
+ categorical_entities=all_categorical_entities,
+ numeric_entities=all_numeric_entities,
+ selected_entity=entity,
+ group_by=group_by,
error=error, )
+
merged_df = pd.merge(numeric_df, categorical_df, how='inner', on='patient_id')
min_val = numeric_df[entity].min()
max_val = numeric_df[entity].max()
count = categorical_df[group_by].count()
adjusted_bins = (max_val - min_val)
+
+
if number_of_bins.isdigit():
int_number_of_bins = int(number_of_bins)
- if int_number_of_bins > 0:
+ if int_number_of_bins > 1:
int_number_of_bins = int(number_of_bins)
- bin_numbers = (adjusted_bins / int_number_of_bins)
+ bin_numbers = (adjusted_bins / int_number_of_bins)
elif number_of_bins == "":
bin_numbers = (adjusted_bins / 20)
else:
error = "You have entered non-integer or negetive value. Please use positive integer"
return render_template('histogram.html', categorical_entities=all_categorical_entities,
numeric_entities=all_numeric_entities,
- error=error, )
+ error=error, )
+
+
groups = set(categorical_df[group_by].values.tolist())
plot_series = []
+ table_data={}
for group in groups:
df = merged_df.loc[merged_df[group_by] == group]
values = df[entity].values.tolist()
+ if number_of_bins.isdigit():
+ hist=np.histogram(values, bins=int(number_of_bins),range = (min_val,max_val))
+ else:
+ hist = np.histogram(values, bins=20, range=(min_val, max_val))
+ table_data[group]={}
+ table_data[group]['count'] =hist[0]
+ table_data[group]['bin'] = hist[1]
if (values):
plot_series.append({
'x' : values,
@@ -77,12 +98,13 @@ def post_statistics():
'start': min_val
}
})
-
return render_template('histogram.html',
categorical_entities=all_categorical_entities,
numeric_entities=all_numeric_entities,
selected_entity=entity,
group_by=group_by,
+ group =groups,
+ table_data=table_data,
plot_series=plot_series,
min_val=min_val,
max_val=max_val,
diff --git a/url_handlers/login.py b/url_handlers/login.py
deleted file mode 100644
index a566c473..00000000
--- a/url_handlers/login.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from flask import Blueprint, render_template, request, redirect
-from flask_login import login_user, UserMixin, logout_user
-
-from passlib.hash import sha256_crypt
-
-
-login_page = Blueprint('login_page', __name__)
-
-
-# creating a custom User class
-class User(UserMixin):
- def __init__(self, id, email, password):
- self.id = id
- self.email = email
- self.password = password
-
- @classmethod
- def get_user(self, email, password):
- from webserver import get_db
- rdb = get_db()
- encrypted_password = rdb.hget('users', email) or ''
- if sha256_crypt.verify(password, encrypted_password):
- # todo: how to store ids??
- # for now we use email as an id
- return User(email, email, password)
- return None
-
- @classmethod
- def get_by_id(self, id):
- # todo: implement user by id
- # for now get user by email and use email as an id
- from webserver import get_db
- rdb = get_db()
- password = rdb.hget('users', id) or ''
- if password:
- return User(id, id, password)
- return None
-
-
-@login_page.route('/login', methods=['GET', 'POST'])
-def login():
- error = None
- if request.method == 'POST':
- email = request.form['email']
- password = request.form['password']
- user = User.get_user(email, password)
- if user is not None:
- login_user(user)
- return redirect('/basic_stats')
- else:
- error = "Incorrect username or password"
- return render_template('login_page.html',
- error=error)
-
-
-@login_page.route('/logout', methods=['GET', 'POST'])
-def logout():
- logout_user()
- return redirect('/login')
diff --git a/url_handlers/logout.py b/url_handlers/logout.py
new file mode 100644
index 00000000..20596416
--- /dev/null
+++ b/url_handlers/logout.py
@@ -0,0 +1,8 @@
+from flask import Blueprint, render_template
+
+logout_page = Blueprint('logout', __name__,
+ template_folder='logout')
+
+@logout_page.route('/logout', methods=['GET', 'POST'])
+def logout():
+ return render_template('logout.html')
diff --git a/url_handlers/plots.py b/url_handlers/plots.py
index 313a5a86..cb3a91e1 100644
--- a/url_handlers/plots.py
+++ b/url_handlers/plots.py
@@ -34,8 +34,6 @@ def post_plots():
all_categorical_entities = rwh.get_categorical_entities(rdb)
all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
- plot_series = []
-
if 'plot_numeric' in request.form:
plot_type = request.form['plot_type']
error = None
@@ -49,19 +47,17 @@ def post_plots():
elif plot_type == 'scatter_plot_n':
x_axis = request.form.get('x_axis')
y_axis = request.form.get('y_axis')
- category = request.form.get('category')
- add_group_by = request.form.get('add_group_by') is not None
- add_separate_regression = request.form.get('add_separate_regression') is not None
-
- # Check for input errors
if not x_axis or not y_axis or x_axis == "Choose entity" or y_axis == "Choose entity":
error = "Please select x_axis and y_axis"
- elif x_axis == y_axis:
+
+ if x_axis == y_axis:
error = "You can't compare the same entity"
- elif add_group_by and category == "Choose entity":
- error = "Please select a categorical value to group by"
+ return render_template('plots/plots.html',
+ error=error,
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities)
- categorical_df, err = rwh.get_joined_categorical_values([category], rdb)
numeric_df, err = rwh.get_joined_numeric_values([x_axis, y_axis], rdb) if not error else (None, error)
error = err if not error else error
@@ -71,56 +67,22 @@ def post_plots():
numeric_tab=True,
x_axis=x_axis,
y_axis=y_axis,
- category=category,
plot_type=plot_type,
all_numeric_entities=all_numeric_entities,
- all_categorical_entities=all_categorical_only_entities,
- add_group_by=add_group_by,
- add_separate_regression=add_separate_regression)
-
- if not add_group_by:
- plot_series = []
- category_values = []
- # change columns order and drop NaN values (this will show only the patients with both values)
- numeric_df = numeric_df.dropna()[[x_axis, y_axis, 'patient_id']]
- # rename columns
- numeric_df.columns = ['x', 'y', 'patient_id']
- # data_to_plot = list(numeric_df.T.to_dict().values())
- plot_series = list(numeric_df.T.to_dict().values())
- # data_to_plot = numeric_df.values.tolist()
- else:
- numeric_df = numeric_df.dropna()
- categorical_df = categorical_df.dropna()
- merged_df = pd.merge(numeric_df, categorical_df, how='inner', on='patient_id')
-
- # category = merged_df[category] if not add_group_by else category
-
- category_values = merged_df[category].unique()
-
- plot_series = []
- for cat_value in category_values:
- df = merged_df.loc[(merged_df[category] == cat_value)].dropna()
- df.columns = ['patient_id', 'x', 'y', 'cat']
- series = {
- 'name' : cat_value,
- 'turboThreshold': len(df),
- 'data' : list(df.T.to_dict().values()),
- 'cat' : cat_value,
- 'series_length' : len(df),
- }
- plot_series.append(series)
+ all_categorical_entities=all_categorical_only_entities)
+ # change columns order and drop NaN values (this will show only the patients with both values)
+ numeric_df = numeric_df.dropna()[[x_axis, y_axis, 'patient_id']]
+ # rename columns
+ numeric_df.columns = ['x', 'y', 'patient_id']
+ data_to_plot = list(numeric_df.T.to_dict().values())
+ # data_to_plot = numeric_df.values.tolist()
return render_template('plots/plots.html',
numeric_tab=True,
all_numeric_entities=all_numeric_entities,
all_categorical_entities=all_categorical_only_entities,
x_axis=x_axis,
y_axis=y_axis,
- category=category,
- cat_values=list(category_values),
- add_group_by=add_group_by,
- add_separate_regression=add_separate_regression,
- plot_series=plot_series,
- # plot_data=data_to_plot,
+ plot_data=data_to_plot,
plot_type=plot_type)
elif plot_type == 'heat_map_n':
selected_entities = request.form.getlist('numeric_entities')
@@ -202,7 +164,6 @@ def post_plots():
if 'plot_categorical' in request.form:
plot_type = request.form.get('plot_type')
selected_entities = request.form.getlist('categorical_entities')
-
categorical_df, error = rwh.get_joined_categorical_values(selected_entities, rdb) if selected_entities else (None, "Please select entities")
@@ -292,4 +253,4 @@ def get_min_max(entity):
'max' : max_val,
'step': float(max_val - min_val) / 100.0
}
- return jsonify(min_max_values)
+ return jsonify(min_max_values)
\ No newline at end of file
diff --git a/url_handlers/scatter_plot.py b/url_handlers/scatter_plot.py
new file mode 100644
index 00000000..796d5b0a
--- /dev/null
+++ b/url_handlers/scatter_plot.py
@@ -0,0 +1,172 @@
+from flask import Blueprint, render_template, request, jsonify
+import numpy as np
+import pandas as pd
+import json
+
+
+import data_warehouse.redis_rwh as rwh
+
+scatter_plot_page = Blueprint('scatter_plot', __name__,
+ template_folder='tepmlates')
+
+
+@scatter_plot_page.route('/scatter_plot', methods=['GET'])
+def get_plots():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ all_categorical_entities = rwh.get_categorical_entities(rdb)
+ all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
+
+ return render_template('scatter_plot.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities)
+
+
+@scatter_plot_page.route('/scatter_plot', methods=['POST'])
+def post_plots():
+ # this import has to be here!!
+ from webserver import get_db
+ rdb = get_db()
+ all_numeric_entities = rwh.get_numeric_entities(rdb)
+ all_categorical_entities = rwh.get_categorical_entities(rdb)
+ all_categorical_only_entities = sorted(set(all_categorical_entities) - set(all_numeric_entities))
+
+
+ y_axis = request.form.get('y_axis')
+ x_axis = request.form.get('x_axis')
+ category = request.form.get('category')
+ add_group_by = request.form.get('add_group_by') is not None
+ add_separate_regression = request.form.get('add_separate_regression') is not None
+
+
+ error = None
+ if not x_axis or not y_axis or x_axis == "Choose entity" or y_axis == "Choose entity":
+ error = "Please select x_axis and y_axis"
+ elif x_axis == y_axis:
+ error = "You can't compare the same entity"
+ elif add_group_by and category == "Choose entity":
+ error = "Please select a categorical value to group by"
+ elif add_group_by and category:
+ categorical_df, error = rwh.get_joined_categorical_values([category], rdb)
+ error = "No data based on the selected entities ( " + ", ".join([category]) + " ) " if error else None
+
+ numeric_df, error = rwh.get_joined_numeric_values([x_axis, y_axis], rdb) if not error else (None, error)
+
+ if error:
+ return render_template('scatter_plot.html',
+ error=error,
+ numeric_tab=True,
+ x_axis=x_axis,
+ y_axis=y_axis,
+ category=category,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities,
+ add_group_by=add_group_by,
+ add_separate_regression=add_separate_regression)
+
+
+ i=0
+ if not add_group_by:
+ i+=1
+ plot_series = []
+ # change columns order and drop NaN values (this will show only the patients with both values)
+ numeric_df = numeric_df.dropna()[[x_axis, y_axis, 'patient_id']]
+ # rename columns
+ numeric_df.columns = ['x', 'y', 'patient_id']
+ # data_to_plot = list(numeric_df.T.to_dict().values())
+ # fit lin to data to plot
+ m, b = np.polyfit(np.array(numeric_df['x']), np.array(numeric_df['y']), 1)
+ bestfit_y = (np.array(numeric_df['x']) * m + b)
+
+ plot_series.append({
+ 'x': list(numeric_df['x']),
+ 'y': list(numeric_df['y']),
+ 'mode': 'markers',
+ 'type': 'scatter',
+ 'name' : 'Patients',
+ 'text': list(numeric_df['patient_id']),
+ })
+
+
+
+ plot_series.append({
+ 'x': list(numeric_df['x']),
+ 'y': list(bestfit_y),
+ 'type': 'scatter',
+ 'name' : 'Linear regression:
(y={0:.2f}x + {1:.2f})'.format(m, b)
+ })
+ else:
+ numeric_df = numeric_df.dropna()
+ categorical_df = categorical_df.dropna()
+ merged_df = pd.merge(numeric_df, categorical_df, how='inner', on='patient_id')
+
+ category_values = merged_df[category].unique()
+
+ plot_series = []
+ for cat_value in category_values:
+
+ colorGen = [ 'rgb(31, 119, 180)', 'rgb(255, 127, 14)',
+ 'rgb(44, 160, 44)', 'rgb(214, 39, 40)',
+ 'rgb(148, 103, 189)', 'rgb(140, 86, 75)',
+ 'rgb(227, 119, 194)', 'rgb(127, 127, 127)',
+ 'rgb(188, 189, 34)', 'rgb(23, 190, 207)']
+
+ df = merged_df.loc[(merged_df[category] == cat_value)].dropna()
+ df.columns = ['patient_id', 'x', 'y', 'cat']
+ # fit lin to data to plot
+ m, b = np.polyfit(np.array(df['x']), np.array(df['y']), 1)
+ bestfit_y = (np.array(df['x']) * m + b)
+ i += 1
+
+ plot_series.append({
+ 'x': list(df['x']),
+ 'y': list(df['y']),
+ 'mode': 'markers',
+ 'type': 'scatter',
+ 'name': cat_value,
+ 'text': list(df['patient_id']),
+ 'marker' : {'color': colorGen[i]}
+ })
+
+
+ plot_series.append({
+ 'x': list(df['x']),
+ 'y': list(bestfit_y),
+ 'type': 'scatter',
+ 'name' : 'Linear regression {0}:
(y={1:.2f}x + {2:.2f})'.format(cat_value, m, b),
+ 'mode' : 'lines',
+ 'line' : {'color' : colorGen[i]}
+ })
+
+
+
+ return render_template('scatter_plot.html',
+ numeric_tab=True,
+ all_numeric_entities=all_numeric_entities,
+ all_categorical_entities=all_categorical_only_entities,
+ x_axis=x_axis,
+ y_axis=y_axis,
+ add_group_by=add_group_by,
+ add_separate_regression=add_separate_regression,
+ plot_series=plot_series)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/webserver.py b/webserver.py
index a7405fb6..0b34dce5 100644
--- a/webserver.py
+++ b/webserver.py
@@ -1,94 +1,46 @@
-import misc.mpl_utils as mpl_utils
-import data_warehouse.data_warehouse_utils as dwu
-import misc.utils as utils
-
-from flask import Flask, session, g, redirect, url_for, render_template, flash
-from flask_login import LoginManager
+# import the Flask class from the flask module
+from flask import Flask, session, g, redirect, flash
from flask_redis import FlaskRedis
-# I don't like the idea of handling all the urls in the same file
-# I'll put all the new urls in the 'url_handlers' directory (one file for each new url)
-# then can import it here and register (below) as a Blueprint: http://flask.pocoo.org/docs/1.0/blueprints/
-from url_handlers.data_management import data_management_page
+# Urls in the 'url_handlers' directory (one file for each new url)
+# import a Blueprint
+
from url_handlers.basic_stats import basic_stats_page
-from url_handlers.plots import plots_page
-from url_handlers.clustering import clustering_page
-from url_handlers.login import login_page
from url_handlers.histogram import histogram_page
from url_handlers.boxplot import boxplot_page
-from url_handlers.coplots import coplots_page
-
-from url_handlers.login import User
+from url_handlers.scatter_plot import scatter_plot_page
+from url_handlers.barchart import barchart_page
+from url_handlers.heatmap import heatmap_plot_page
+from url_handlers.clustering_pl import clustering_plot_page
+from url_handlers.coplots_pl import coplots_plot_page
+from url_handlers.logout import logout_page
import os
-
-###
-# Images
-###
-from flask import make_response, request
-from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
-
-import matplotlib.pyplot as plt
-
-import flask
-import io
-import matplotlib.backends.backend_agg
-
-import seaborn as sns
-
-sns.set(style='whitegrid')
-
-# from: https://arusahni.net/blog/2014/03/flask-nocache.html
-from functools import wraps, update_wrapper
-from datetime import datetime
-
from modules.import_scheduler import Scheduler
-import atexit
-
+# create the application object
app = Flask(__name__)
-app.config.from_object(__name__)
-
-app.config.update(dict(
- SECRET_KEY='development key',
- USERNAME='admin',
- PASSWORD='default'
- ))
-app.config.from_envvar('FLASKR_SETTINGS', silent=True)
# register blueprints here:
-app.register_blueprint(data_management_page)
+
+app.register_blueprint(logout_page)
app.register_blueprint(basic_stats_page)
-app.register_blueprint(plots_page)
-app.register_blueprint(clustering_page)
-app.register_blueprint(login_page)
app.register_blueprint(histogram_page)
app.register_blueprint(boxplot_page)
-app.register_blueprint(coplots_page)
-
-# login manager
-login_manager = LoginManager()
-login_manager.init_app(app)
-
+app.register_blueprint(scatter_plot_page)
+app.register_blueprint(barchart_page)
+app.register_blueprint(heatmap_plot_page)
+app.register_blueprint(clustering_plot_page)
+app.register_blueprint(coplots_plot_page)
-# don't understand why we need this
-@login_manager.user_loader
-def load_user(user_id):
- return User.get_by_id(user_id)
-
-
-# @app.context_processor
-# def inject_enumerate():
-# return dict(enumerate=enumerate)
-
-###
-# Database stuff
-###
+# Connection with database
def connect_db():
""" connects to our redis database """
- app.config["REDIS_URL"] = os.environ["REDIS_URL"] # || "redis://docker.for.mac.localhost:6379/0"
- redis_store = FlaskRedis()
- redis_store.init_app(app)
+ # Set this connection URL in an environment variable, and then load it into your application configuration using
+ # os.environ, like this
+ app.config["REDIS_URL"] = os.environ["REDIS_URL"]
+ # To add a Redis client to your application
+ redis_store = FlaskRedis(app)
return redis_store
@@ -103,486 +55,20 @@ def get_db():
@app.teardown_appcontext
def close_db(error):
- """ close the database, or whatever, on exit """
- pass
-
-
-def init_db():
- db = get_db()
- return
-
- # we can use app.open_resource to grab something from the main
- # folder (flaskr, here)
- with app.open_resource('schema.sql', mode='r') as f:
- pass
-
+ """Closes the database again at the end of the request."""
+ if hasattr(g, 'redis_db'):
+ g.redis_db.close()
-@app.cli.command('initdb')
-def initdb_command():
- """ this function is associated with the "initdb" command of the
- "flask" script
- """
- init_db()
+""" Direct to Basic Stats website during opening the program."""
@app.route('/', methods=['GET'])
-def root_route():
- return redirect('/basic_stats')
-
-
-###
-# Security
-###
-@app.route('/login', methods=['GET', 'POST'])
def login():
- error = None
- if request.method == 'POST':
- if request.form['username'] != app.config['USERNAME']:
- error = "Invalid username"
- elif request.form['password'] != app.config['PASSWORD']:
- error = "Invalid password"
- else:
- session['logged_in'] = True
- flash("You were logged in")
- return redirect(url_for('show_entities'))
- return render_template('login_page.html', error=error)
-
-
-# @app.route('/login', methods=['POST'])
-# def login():
-# error = None
-# if request.method == 'POST':
-# if request.form['email'] != app.config['USERNAME']:
-# error = "Login error: Invalid username"
-# elif request.form['password'] != app.config['PASSWORD']:
-# error = "Login error: Invalid password"
-# else:
-# session['logged_in'] = True
-# flash("You were logged in")
-# # todo: error message
-# return redirect(request.referrer)
-
-
-@app.route('/logout')
-def logout():
- session.pop('logged_in', None)
- flash("You were logged out")
-
-
-# return redirect(request.referrer)
-
-# @app.route('/a', methods=['GET','POST'])
-# def show_cluster_numeric_entities():
-# r = get_db()
-# all_numeric_entities = rwh.get_numeric_entities(r)
-# all_categorical_entities = rwh.get_categorical_entities(r)
-#
-# all_categorical_only_entities = set(all_categorical_entities) - set(all_numeric_entities)
-# all_categorical_only_entities = sorted(all_categorical_only_entities)
-#
-# # numeric clustering
-# numeric_m = None
-# numeric_label_uses = None
-# numeric_entities = None
-# numeric_cluster_image = None
-# numeric_standardize=None
-# numeric_missing=None
-#
-# # categorical clustering
-# cluster_category_values = None
-# categorical_label_uses = None
-# categorical_entities = None
-# category_values = None
-# categorical_cluster_image = None
-#
-# # counting
-# counts = None
-# all_present = None
-# any_present = None
-#
-# if 'cluster_numeric' in request.form:
-# numeric_entities = request.form.getlist('numeric_entities')
-# numeric_standardize = request.form['standardize'] == "yes"
-# numeric_missing = request.form['missing']
-# if any([entitiy for entitiy in numeric_entities]):
-# np.random.seed(8675309)
-# cluster_info = dwu.cluster_numeric_fields(
-# numeric_entities,
-# r,
-# standardize=numeric_standardize,
-# missing=numeric_missing
-# )
-#
-# numeric_m, numeric_label_uses, df = cluster_info
-# any_present = df.shape[0]
-# all_present = df.dropna().shape[0]
-# numeric_cluster_image = True
-#
-# elif 'count_numeric' in request.form:
-# numeric_entities = request.form.getlist('numeric_entities')
-# if any([numeric_entitiy for numeric_entitiy in numeric_entities]):
-# numeric_df = rwh.get_joined_numeric_values(numeric_entities, r)
-# numeric_df = numeric_df[numeric_entities]
-# counts = numeric_df.count()
-# any_present = numeric_df.shape[0]
-# all_present = numeric_df.dropna().shape[0]
-#
-# elif 'count_categorical' in request.form:
-# categorical_entities = request.form.getlist('categorical_entities')
-#
-# if any([entitiy for entitiy in categorical_entities]):
-# categorical_df = rwh.get_joined_categorical_values(categorical_entities, r)
-# #
-# categorical_df = categorical_df[categorical_entities]
-#
-# counts = categorical_df.count()
-# any_present = categorical_df.shape[0]
-# all_present = categorical_df.dropna().shape[0]
-#
-# elif 'cluster_categorical' in request.form:
-# categorical_entities = request.form.getlist('categorical_entities')
-# if any([entitiy for entitiy in categorical_entities]):
-# eps = 0.15
-# min_samples = 10
-#
-# np.random.seed(8675309)
-#
-# cluster_info = dwu.cluster_categorical_entities(
-# categorical_entities,
-# r,
-# eps=eps,
-# min_samples=min_samples
-# )
-#
-# ccv, cat_rep_np, category_values, categorical_label_uses, cat_df = cluster_info
-# cluster_category_values = ccv
-# any_present = cat_df.shape[0]
-# all_present = cat_df.dropna().shape[0]
-#
-# categorical_cluster_image = True
-#
-# return render_template('statistics.html',
-# all_numeric_entities=all_numeric_entities,
-# all_categorical_entities=all_categorical_only_entities,
-# numeric_m=numeric_m,
-# numeric_label_uses=numeric_label_uses,
-# numeric_entities=numeric_entities,
-# numeric_cluster_image=numeric_cluster_image,
-# numeric_standardize=numeric_standardize,
-# numeric_missing=numeric_missing,
-# cluster_category_values=cluster_category_values,
-# categorical_label_uses=categorical_label_uses,
-# categorical_entities=categorical_entities,
-# category_values=category_values,
-# categorical_cluster_image=categorical_cluster_image,
-# counts=counts,
-# any_present=any_present,
-# all_present=all_present
-# )
-
-
-def send_image(fig):
- fig.tight_layout()
- canvas = matplotlib.backends.backend_agg.FigureCanvas(fig)
- img = io.BytesIO()
- fig.savefig(img)
- img.seek(0)
- return flask.send_file(img, mimetype='image/png')
-
-
-def nocache(view):
- @wraps(view)
- def no_cache(*args, **kwargs):
- response = make_response(view(*args, **kwargs))
- response.headers['Last-Modified'] = datetime.now()
- response.headers['Cache-Control'] = 'no-store, no-cache, must-revalidate, post-check=0, pre-check=0, max-age=0'
- response.headers['Pragma'] = 'no-cache'
- response.headers['Expires'] = '-1'
- return response
-
- return update_wrapper(no_cache, view)
-
-
-@app.route('/categorical_cluster_image/
')
-@nocache
-def categorical_cluster_image(entities):
- import numpy as np
- import pandas as pd
- import sklearn.preprocessing
-
- r = get_db()
-
- np.random.seed(8675309)
-
- categorical_entities = entities.split(",")
-
- cluster_info = dwu.cluster_categorical_entities(
- categorical_entities,
- r
- )
-
- ccv, cat_rep_np, category_values, categorical_label_uses, cat_df, error = cluster_info
- if error:
- # TODO: check if this affects anything
- return None
- ccv_df = pd.DataFrame(ccv)
-
- # this should all be pulled into a function
- patient_count_field = "Scaled patient count"
-
- ccv_np = ccv_df.values
- scaled_ccv_np = sklearn.preprocessing.normalize(ccv_np, norm="l1")
- scaled_ccv_pivot_np = scaled_ccv_np.transpose()
-
- ccv_pivot_df = pd.DataFrame(scaled_ccv_pivot_np)
- ccv_pivot_df.columns = ccv_df.index
- ccv_pivot_df.index = ccv_df.columns
-
- cols_to_visualize = list(ccv_pivot_df.columns) + [patient_count_field]
-
- label_uses_df = utils.dict_to_dataframe(categorical_label_uses, key_name="cluster", value_name="count")
-
- label_counts = label_uses_df['count']
- label_counts = label_counts.values.reshape(1, -1)
- label_uses_df[patient_count_field] = sklearn.preprocessing.normalize(label_counts, norm="l1")[0]
-
- ccv_pivot_merge_df = ccv_pivot_df.merge(label_uses_df, left_index=True, right_on='cluster')
- ccv_pivot_merge_df.index = ccv_pivot_merge_df['cluster']
-
- df = ccv_pivot_merge_df[cols_to_visualize]
-
- new_df = pd.DataFrame(df.values.transpose())
- new_df.columns = df.index
- new_df.index = df.columns
- m_zero = new_df == 0
-
- # create the image
-
- fig, ax = plt.subplots(figsize=(25, 10))
- fontsize = 30
-
- vmax = 1
- sns.heatmap(
- new_df,
- cmap="Blues",
- ax=ax,
- vmin=0,
- vmax=vmax,
- mask=m_zero
- )
-
- mpl_utils.set_ticklabel_rotation(ax, 0, axis='y')
- mpl_utils.set_ticklabels_fontsize(ax, fontsize)
- mpl_utils.set_title_fontsize(ax, fontsize)
- mpl_utils.set_label_fontsize(ax, fontsize)
-
- ax.xaxis.tick_top()
- ax.xaxis.set_label_position('top')
-
- # get the colorbar, as well
- cax = plt.gcf().axes[-1]
- mpl_utils.set_ticklabels_fontsize(cax, fontsize)
-
- return send_image(fig)
-
-
-@app.route('/patient_categorical_cluster_image/')
-@nocache
-def patient_categorical_cluster_image(entities):
- import numpy as np
- import sklearn.manifold
-
- r = get_db()
-
- eps = 0.15
- min_samples = 10
-
- np.random.seed(8675309)
-
- categorical_entities = entities.split(",")
-
- cluster_info = dwu.cluster_categorical_entities(
- categorical_entities,
- r,
- eps=eps,
- min_samples=min_samples
- )
-
- ccv, cat_rep_np, category_values, categorical_label_uses, cat_df, error = cluster_info
- if error:
- # TODO: check if this affects anything
- return None
-
- np.random.seed(8675309)
- tsne = sklearn.manifold.TSNE(n_components=2)
- np.set_printoptions(suppress=True)
- projection = tsne.fit_transform(cat_rep_np)
-
- fig, ax = plt.subplots() # figsize=(15,15))
-
- # Black removed and is used for noise instead.
- unique_labels = sorted(cat_df['cluster'].unique())
- colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
- for k, col in zip(unique_labels, colors):
- if k == -1:
- # Black used for noise.
- col = 'k'
-
- m_cluster = np.array(cat_df['cluster'] == k)
-
- xy = projection[m_cluster]
- ax.plot(
- xy[:, 0],
- xy[:, 1],
- 'o',
- markerfacecolor=col,
- markeredgecolor='k',
- markersize=8,
- markeredgewidth=1,
- label=str(k)
- )
-
- ax.legend(loc='best')
-
- return send_image(fig)
-
-
-@app.route('/numeric_cluster_image/', methods=['GET'])
-@nocache
-def numeric_cluster_image(entities):
- import numpy as np
- import pandas as pd
- import sklearn.preprocessing
-
- r = get_db()
-
- numeric_entities = entities.split(",")
- standardize = request.args.get('standardize')
- missing = request.args.get('missing')
-
- np.random.seed(8675309)
- cluster_info = dwu.cluster_numeric_fields(
- numeric_entities,
- r,
- standardize=standardize,
- missing=missing
- )
-
- numeric_m, numeric_label_uses, patient_df, error = cluster_info
- if error:
- # TODO: check if this affects anything
- return None
-
-
- # this should be a function
- X = patient_df[numeric_entities].values
- X = sklearn.preprocessing.scale(X)
-
- scaled_df = pd.DataFrame(X)
- scaled_df.columns = numeric_entities
- scaled_df.index = patient_df.index
- scaled_df['cluster'] = patient_df['cluster']
-
- tidy_scaled_df = pd.melt(scaled_df, id_vars=['cluster'], value_name="Scaled value")
- tidy_scaled_df['variable'] = dwu.clean_entity_names(tidy_scaled_df['variable'])
-
- # m_outlier = tidy_scaled_df['cluster'] == 2
- # tidy_scaled_no_outlier = tidy_scaled_df[~m_outlier]
-
- # create the plot
- viz_df = tidy_scaled_df
- fontsize = 20
-
- num_clusters = len(viz_df['cluster'].unique())
-
- g = sns.factorplot(
- x="variable",
- y="Scaled value",
- col="cluster",
- col_wrap=3,
- # row="variable",
- data=viz_df,
- kind='violin',
- sharey=False,
- # size=5,
- aspect=num_clusters / 5
- )
-
- g.set(ylim=(-10, 10))
-
- for ax in g.axes.flat:
- mpl_utils.set_title_fontsize(ax, fontsize)
- mpl_utils.set_ticklabels_fontsize(ax, fontsize)
-
- # g.set_xticklabels(fontsize=fontsize)
- # g.set_yticklabels(fontsize=fontsize)
- g.set_xlabels(fontsize=fontsize)
- g.set_ylabels(fontsize=fontsize)
-
- g.fig.tight_layout()
-
- return send_image(g.fig)
-
-
-@app.route('/patient_numeric_cluster_image/', methods=['GET'])
-@nocache
-def patient_numeric_cluster_image(entities):
- import numpy as np
- import sklearn.manifold
-
- r = get_db()
-
- numeric_entities = entities.split(",")
- standardize = request.args.get('standardize')
- missing = request.args.get('missing')
-
- np.random.seed(8675309)
- cluster_info = dwu.cluster_numeric_fields(
- numeric_entities,
- r,
- standardize=standardize,
- missing=missing
- )
-
- numeric_m, numeric_label_uses, patient_df, error = cluster_info
- if error:
- # TODO: check if this affects anything
- return None
-
-
- np.random.seed(8675309)
- tsne = sklearn.manifold.TSNE(n_components=2)
- np.set_printoptions(suppress=True)
- projection = tsne.fit_transform(patient_df[numeric_entities])
-
- fig, ax = plt.subplots() # figsize=(15,15))
-
- # Black removed and is used for noise instead.
- unique_labels = sorted(patient_df['cluster'].unique())
- colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
- for k, col in zip(unique_labels, colors):
- if k == -1:
- # Black used for noise.
- col = 'k'
-
- m_cluster = np.array(patient_df['cluster'] == k)
- xy = projection[m_cluster]
- ax.plot(
- xy[:, 0],
- xy[:, 1],
- 'o',
- markerfacecolor=col,
- markeredgecolor='k',
- markersize=8,
- markeredgewidth=1,
- label=str(k)
- )
-
- ax.legend(loc='best')
+ return redirect('/basic_stats')
- return send_image(fig)
+# Import data to redis
def check_for_env(key: str, default=None, cast=None):
if key in os.environ:
if cast:
@@ -590,20 +76,17 @@ def check_for_env(key: str, default=None, cast=None):
return os.environ.get(key)
return default
-
+# date and hours to import data
day_of_week = check_for_env('IMPORT_DAY_OF_WEEK', default='mon-sun')
hour = check_for_env('IMPORT_HOUR', default=5)
minute = check_for_env('IMPORT_MINUTE', default=5)
+
+# Import data using function scheduler from package modules
if os.environ.get('IMPORT_DISABLED') is None:
scheduler = Scheduler(day_of_week=day_of_week, hour=hour, minute=minute)
scheduler.start()
-
-
-@atexit.register
-def exit():
scheduler.stop()
-
def main():
- return app
+ return app
\ No newline at end of file