-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathBalance.py
38 lines (34 loc) · 943 Bytes
/
Balance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
'''
Created on Dec 18, 2015
@author: Nikola
Created at the University of Manchester, School of Computer Science
Licence GNU/GPL 3.0
'''
import random
with open('learnngCellDatasetAge.csv') as f:
content = f.readlines()
f2 = open('learnngCellDatasetAgeBalanced.csv', 'w+')
f2.write("ArticleId,PMCid,TableName,SpecPragmatics,CellContent,Header,Stub,SuperRow,rowN,columnN,function,hasValue\n")
i = 0
countbad = 0
countgood = 0
NoArray = []
while i<len(content):
row = content[i]
split = row.split(",")
last = split[len(split)-1]
if('yes' in last):
f2.write(row)
countgood = countgood + 1
else:
NoArray.append(row)
i = i+1
i=0
print countgood
random.seed(419)
while i < 200:
randNum = random.randrange(0,len(NoArray))
f2.write(NoArray[randNum])
i=i+1
f2.close()
print "done"