-
Notifications
You must be signed in to change notification settings - Fork 75
/
Copy pathapp.py
87 lines (72 loc) · 3.31 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
import pandas as pd
from PIL import Image
import subprocess
import os
import base64
import pickle
# Molecular descriptor calculator
def desc_calc():
# Performs the descriptor calculation
bashCommand = "java -Xms2G -Xmx2G -Djava.awt.headless=true -jar ./PaDEL-Descriptor/PaDEL-Descriptor.jar -removesalt -standardizenitro -fingerprints -descriptortypes ./PaDEL-Descriptor/PubchemFingerprinter.xml -dir ./ -file descriptors_output.csv"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
os.remove('molecule.smi')
# File download
def filedownload(df):
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode() # strings <-> bytes conversions
href = f'<a href="data:file/csv;base64,{b64}" download="prediction.csv">Download Predictions</a>'
return href
# Model building
def build_model(input_data):
# Reads in saved regression model
load_model = pickle.load(open('acetylcholinesterase_model.pkl', 'rb'))
# Apply model to make predictions
prediction = load_model.predict(input_data)
st.header('**Prediction output**')
prediction_output = pd.Series(prediction, name='pIC50')
molecule_name = pd.Series(load_data[1], name='molecule_name')
df = pd.concat([molecule_name, prediction_output], axis=1)
st.write(df)
st.markdown(filedownload(df), unsafe_allow_html=True)
# Logo image
image = Image.open('logo.png')
st.image(image, use_column_width=True)
# Page title
st.markdown("""
# Bioactivity Prediction App (Acetylcholinesterase)
This app allows you to predict the bioactivity towards inhibting the `Acetylcholinesterase` enzyme. `Acetylcholinesterase` is a drug target for Alzheimer's disease.
**Credits**
- App built in `Python` + `Streamlit` by [Chanin Nantasenamat](https://medium.com/@chanin.nantasenamat) (aka [Data Professor](http://youtube.com/dataprofessor))
- Descriptor calculated using [PaDEL-Descriptor](http://www.yapcwsoft.com/dd/padeldescriptor/) [[Read the Paper]](https://doi.org/10.1002/jcc.21707).
---
""")
# Sidebar
with st.sidebar.header('1. Upload your CSV data'):
uploaded_file = st.sidebar.file_uploader("Upload your input file", type=['txt'])
st.sidebar.markdown("""
[Example input file](https://raw.githubusercontent.com/dataprofessor/bioactivity-prediction-app/main/example_acetylcholinesterase.txt)
""")
if st.sidebar.button('Predict'):
load_data = pd.read_table(uploaded_file, sep=' ', header=None)
load_data.to_csv('molecule.smi', sep = '\t', header = False, index = False)
st.header('**Original input data**')
st.write(load_data)
with st.spinner("Calculating descriptors..."):
desc_calc()
# Read in calculated descriptors and display the dataframe
st.header('**Calculated molecular descriptors**')
desc = pd.read_csv('descriptors_output.csv')
st.write(desc)
st.write(desc.shape)
# Read descriptor list used in previously built model
st.header('**Subset of descriptors from previously built models**')
Xlist = list(pd.read_csv('descriptor_list.csv').columns)
desc_subset = desc[Xlist]
st.write(desc_subset)
st.write(desc_subset.shape)
# Apply trained model to make prediction on query compounds
build_model(desc_subset)
else:
st.info('Upload input data in the sidebar to start!')