-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathevent_work.py
145 lines (105 loc) · 3.64 KB
/
event_work.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import db_events
import event_merge
import event_mapping
def map_events(events):
"""
Map sections in text to events
"""
import event_sections
return reduce(event_mapping.map_event_section, event_sections.sections, events)
def make_event_authors(events):
"""
Make new author names like 'A', 'B', ... , '1A', and so on.
"""
authors = {}
for pad, revs, event in events:
author = event[u'meta'][u'author']
if author not in authors:
i = len(authors)
m = ord('Z') - ord('A') + 1
n = i / m
l = chr(ord('A') + i % m)
authors[author] = "%d%s" % (n, l) if n else l
return authors
def map_event_authors(events, authors):
"""
Map authors to events
"""
for event in events:
event.author = authors[event.author]
yield event
def export_events(merge_events):
"""
Convert events for futher export into file / db
"""
import json
def ts2s(timestamp):
"""
Convert timestamp (in seconds) into printable datetime format.
"""
import time
return time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(timestamp))
for revision, merge_event in enumerate(merge_events):
event = merge_event.events[0]
last_event = merge_event.events[-1]
p_start, p_end, p_text = merge_event.positive_changes
n_start, n_end, n_text = merge_event.negative_changes
author = event.author
case = event.case or ""
section = event.section or ""
start_revision = event.revision
end_revision = last_event.revision
timestamp = ts2s(event.timestamp)
changeset = json.dumps({
# "revisions": [event.revision for event in merge_event.events],
"positive": p_text.encode("utf8"),
"positive_start": p_start,
"positive_length": p_end - p_start,
"negative": n_text.encode("utf8"),
"negative_start": n_start,
"negative_length": n_end - n_start
})
yield revision + 1, author, case, section, end_revision, timestamp, changeset
def csv_export(header, rows, filename):
"""
Export rows into csv file 'filename' with header.
"""
import csv
with open(filename, "w") as csv_file:
writer = csv.writer(csv_file, delimiter = ";")
writer.writerows([header])
writer.writerows(rows)
def process_events(events, authors, threshold):
events = map_events(events)
events = event_merge.make_events(events)
events = map_event_authors(events, authors)
merge_events = (event_merge.MergeEvent([event]) for event in events)
merge_events = event_merge.merge_events(merge_events, timestamp_threshold)
return merge_events
def print_events(merge_events):
import event_html
merge_events = list(merge_events)
for merge_event in merge_events:
end_event = merge_event.events[-1]
events = event_html.order_events(merge_events)
events = event_html.sentinel_events(events, end_event)
text = event_html.build_event_text(events)
yield event_html.format_event_text(text, merge_event), end_event.revision
def html_export(texts, filename):
for text, n in texts:
with open(filename % n, "w") as html_file:
html_file.write(text.encode("ascii", "xmlcharrefreplace"))
if __name__ == "__main__":
import sys
pad = sys.argv[1]
print "Processing %s pad" % pad
timestamp_threshold = 60
events = db_events.load_events(pad)
authors = make_event_authors(db_events.load_events(pad))
merge_events = process_events(events, authors, timestamp_threshold)
merge_events = list(merge_events)
# csv_header = ["nr", "user", "case", "eventtext", "startrevision", "endrevision", "starttime", "changeset"]
csv_header = ["nr", "user", "case", "eventtext", "endrevision", "starttime", "changeset"]
csv_export(csv_header, export_events(merge_events), "%s_merge_events.csv" % pad)
# html_export(print_events(merge_events), "%s_%%d.html" % pad)
print "Processing done."