-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathExtractUsingMetaMap.py
182 lines (170 loc) · 8.6 KB
/
ExtractUsingMetaMap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
'''
Created on 22 Aug 2016
@author: mbaxkhm4
Created at the University of Manchester, School of Computer Science
Licence GNU/GPL 3.0
'''
from QueryDBClass import QueryDBCalss
from Data.Table import Table
from AnalyzePattern import GetMean,GetRange
import re
if __name__=="__main__":
queryclass = QueryDBCalss("localhost","root","","table_db", )
queryclass.DeleteAttribute("Age")
queryclass.CreateAdditionalTables()
results = queryclass.getCellsWithMetaMapAnnotation("BaselineCharacteristic","orga")
lastCellID = -1
for res in results:
CellID = res[0]
if(CellID==lastCellID):
continue
TableID = res[3]
RowN = res[4]
ColumtnN = res[5]
Content = res[9]
Header = res [10]
Stub = res[11]
SuperRow = res[12]
AnnotationContent = res[14]
AnnotationDesc = res[19]
TableOrder = res[28]
idArt = res[34]
PMC = res[38]
if PMC=='2361378':#'1488867':
print 'right PMC'
#CellRole = res[51]
lastCellID = CellID
rowRes = queryclass.getCellsFromTableRowRow(TableID,RowN)
# l = 0
for res in rowRes:
m2 = {}
#m2 = re.search('[\d\.]+',res[9])
m2 = GetMean(res[9],m2)
if(m2!=None and "mean" in m2.keys() and "min" in m2.keys() and "max" in m2.keys() and m2["mean"]!=None and m2["min"]!=None and m2["max"]!=None):
print "has all"
else:
m2 = GetRange(res[9],m2)
if(m2!=None and "mean" in m2.keys() and "min" in m2.keys() and m2["mean"]==m2["min"]):
del m2["mean"]
if(m2==None):
continue
if(m2==None or ("mean" in m2.keys() and (m2["mean"]=='.'or m2["mean"]=='..'))):
continue
content = re.sub(r'[^\x00-\x7F]','[spec]',res[9])
content = content.replace('?','[spec]')
if "range" in res[10].lower():
m2 = GetRange(res[9],m2)
if("sd" in res[10].lower() and 'mean' not in res[10].lower()) or "p-value" in res[10].lower() or "p" ==res[10].lower():
continue
m3 = re.search('\\b(p)\\b',res[10].lower())
m4 = re.search('\\b[\/](p)\\b',res[10].lower())
if m3!=None and m4==None:
continue
if(res[11]!=None and "onset" in res[11].lower()):
continue
if(content!=None and res[11]!=None and ('%' in content or 'day' in content or 'min' in content or '<' in content or '>' in content or '=' in content or '?' in content or '<' in res[11] or '>' in res[11] or '=' in res[11]or 'min' in res[11].lower() or 'max' in res[11].lower())):
continue
unit = 'years'
if(res[11]!=None and ("months" in res[11].lower() or "months" in res[9].lower())):
unit = 'months'
#totalNum = totalNum/12
if(res[11]!=None and ("weeks" in res[11].lower() or "weeks" in res[9].lower())):
unit = 'weeks'
#totalNum = totalNum/52
totalNum = 0.0
if(m2!= None and "mean" in m2.keys() and m2["mean"]!=None):
totalNum = float(m2["mean"])
if(m2== None):
continue
print AnnotationContent+':'+str(totalNum)+" "+unit
if("mean" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"mean",float(m2["mean"]),unit,res[10],"MetaMap","MetaMap")
if("sd" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"sd",float(m2["sd"]),unit,res[10],"MetaMap","MetaMap")
if("min" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"Range:minimum",float(m2["min"]),unit,res[10],"MetaMap","MetaMap")
if("max" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"Range:maximum",float(m2["max"]),unit,res[10],"MetaMap","MetaMap")
# l = l+1
print "Done normal, going super-row"
# if row super-row - take the rows bellow
results = queryclass.getCellsWithMetaMapAnnotationWithRole("BaselineCharacteristic","orga",4)
lastCellID = -1
for res in results:
CellID = res[0]
if(CellID==lastCellID):
continue
TableID = res[3]
RowN = res[4]
ColumtnN = res[5]
Content = res[9]
Header = res [10]
Stub = res[11]
SuperRow = res[12]
AnnotationContent = res[14]
AnnotationDesc = res[19]
TableOrder = res[28]
idArt = res[34]
PMC = res[38]
if PMC=='2361378':#'1488867':
print 'right PMC'
CellRole = res[51]
lastCellID = CellID
row = RowN+1
CellRole = 3
SuperRowOfInterest = Content
SuperRow = SuperRowOfInterest
while CellRole!=4 and SuperRow!=None and SuperRowOfInterest!=None and SuperRowOfInterest in SuperRow and row<50:
rowRes = queryclass.getCellsFromTableRowRowWithRole(TableID,row)
for res in rowRes:
SuperRow = res[12]
CellRole = res[14]
m2 = {}
m2 = GetMean(res[9],m2)
if(m2!=None and "mean" in m2.keys() and "min" in m2.keys() and "max" in m2.keys() and m2["mean"]!=None and m2["min"]!=None and m2["max"]!=None):
print "has all"
else:
m2 = GetRange(res[9],m2)
if(m2!=None and "mean" in m2.keys() and "min" in m2.keys() and m2["mean"]==m2["min"]):
del m2["mean"]
if(m2==None):
continue
if(m2==None or ("mean" in m2.keys() and (m2["mean"]=='.'or m2["mean"]=='..'))):
continue
content = re.sub(r'[^\x00-\x7F]','[spec]',res[9])
content = content.replace('?','[spec]')
if "range" in res[10].lower():
m2 = GetRange(res[9],m2)
if("sd" in res[10].lower() and 'mean' not in res[10].lower()) or "p-value" in res[10].lower() or "p" ==res[10].lower():
continue
m3 = re.search('\\b(p)\\b',res[10].lower())
m4 = re.search('\\b[\/](p)\\b',res[10].lower())
if m3!=None and m4==None:
continue
if(res[11]!=None and "onset" in res[11].lower()):
continue
if(content!=None and res[11]!=None and ('%' in content or 'day' in content or 'min' in content or '<' in content or '>' in content or '=' in content or '?' in content or '<' in res[11] or '>' in res[11] or '=' in res[11]or 'min' in res[11].lower() or 'max' in res[11].lower())):
continue
unit = 'years'
if(res[11]!=None and ("months" in res[11].lower() or "months" in res[9].lower())):
unit = 'months'
#totalNum = totalNum/12
if(res[11]!=None and ("weeks" in res[11].lower() or "weeks" in res[9].lower())):
unit = 'weeks'
#totalNum = totalNum/52
totalNum = 0.0
if(m2!= None and "mean" in m2.keys() and m2["mean"]!=None):
totalNum = float(m2["mean"])
if(m2== None):
continue
print AnnotationContent+':'+str(totalNum)+" "+unit
if("mean" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"mean",float(m2["mean"]),unit,res[10],"MetaMap","MetaMap")
if("sd" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"sd",float(m2["sd"]),unit,res[10],"MetaMap","MetaMap")
if("min" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"Range:minimum",float(m2["min"]),unit,res[10],"MetaMap","MetaMap")
if("max" in m2.keys()):
queryclass.SaveExtracted(idArt,TableID,TableOrder,PMC,AnnotationContent,"Range:maximum",float(m2["max"]),unit,res[10],"MetaMap","MetaMap")
row = row+1
print "Done"