-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfindRatePerClass.py
executable file
·38 lines (32 loc) · 1.28 KB
/
findRatePerClass.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python
"""
Find mutation rate per repeat class.
"""
from argparse import ArgumentParser
from collections import defaultdict
def main():
parser = ArgumentParser()
parser.add_argument('liftoverPsl')
opts = parser.parse_args()
totalSizeByClass = defaultdict(int)
totalMismatchesByClass = defaultdict(int)
ratesByClass = defaultdict(list)
with open(opts.liftoverPsl) as f:
for line in f:
fields = line.split()
assert len(fields) == 22, "Invalid number of fields in PSL-with-name input"
name = fields[0]
mismatches = int(fields[2])
total = int(fields[1]) + int(fields[2]) + int(fields[3])
if total == 0:
# Avoid division by zero
continue
totalSizeByClass[name] += total
totalMismatchesByClass[name] += mismatches
ratesByClass[name] += [float(mismatches)/total]
overallRateByClass = dict((name, float(mm)/totalSizeByClass[name]) for name, mm in totalMismatchesByClass.items())
for k,v in overallRateByClass.items():
print k, v, totalSizeByClass[k]
print float(sum(totalMismatchesByClass.values()))/sum(totalSizeByClass.values()), sum(totalSizeByClass.values())
if __name__ == '__main__':
main()