-
Notifications
You must be signed in to change notification settings - Fork 111
/
Copy pathimport_protocol_doc.py
executable file
·369 lines (309 loc) · 14.8 KB
/
import_protocol_doc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
#!/usr/bin/env python
# coding: utf8
"""Script to extract the list of commands from the Onkyo protocol
documentation, which is an Excel file.
Since this Excel file is not designed to be read by machines, I don't
expect this to work for new versions of the file without adjustments.
Here's how the process is supposed to work:
- This script takes the Excel document as a an input file, and
converts it into a YAML.
- This is checked into version control.
- Adjustments to this file are required and made, via version control.
- Subsequently, a new version of the Excel file can be parsed into YAML
and merged with the manual changes.
- The YAML file is used by the Python library for the final command list;
potentially be further generating a Python file from it for speed.
"""
import sys
import re
import os
from datetime import datetime
import yaml
from collections import OrderedDict
import tablib
def make_command(name):
"""Convert a string into a space-less command."""
name = re.sub('[^\w]', ' ', name) # Replace special characters with spaces
name = name.strip().lower()
while " " in name: name = name.replace(' ', ' ') # Replace duplicate spaces
name = name.replace(' ', '-') # In the end, we want no whitespace
return name
# Tained tuple that can have a non-standard YAML representation
class FlowStyleTuple(tuple):
pass
def is_known_footer(string):
"""Sometimes the excel has a footer for a command section and
we cannot very reliably differentiate it from a command value
(actually, it might be possible, but this appraoch is easier).
"""
if not string:
return False
lines = [
u'If Jacket Art is disable from one',
u'Please refer to sheets of popup xml,',
u'Line Separator : " ・ "(0x20, 0xC2, 0xB7, 0x20'
]
for line in lines:
if line in string:
return True
return False
def import_sheet(groupname, sheet, modelsets):
data = OrderedDict()
# First line has a list of models, ignore empty cols, and first two.
modelcols = filter(lambda s: bool(s), sheet[0])[2:]
# One model headers can continue multiple models. Split.
modelcols = [m
.replace('\n(Ether)', '(Ether)')
.replace('\n(Ver2.0)', '(Ver2.0)')
.replace('TX-NR5000ETX-NA1000', 'TX-NR5000\nETX-NA1000')
.split('\n')
for m in modelcols]
# Because there is at least one floating standalone table next
# to the main table, that we don't care about, and which in rows
# further down below will bother us.
max_model_column = len([s for s in modelcols if bool(s)]) + 2
def loop_rows(data):
it = iter(data)
while True:
row = next(it)
# Special case with many many rows we have to skip
# (the NRI query command)
if row[0] and 'ex.)XML data' in row[0]:
while True:
row = next(it)
if row[0] and 'NET Custom Popup' in row[0]:
break
else:
yield row
prefix = prefix_desc = None
for row in loop_rows(sheet[1:]):
# Remove right-most columns that no longer belong to the main table
row = row[:max_model_column]
# Remove whitespace from all fields
row = [unicode(s).strip() if s else s for s in row]
#print row
# Ignore empty lines
if not any(row):
continue
# Excel format is as such:
# First, there is a row that defines the command prefix,
# e.g. "PWR" for power. We can recognize that by the fact that
# the model colums are all empty.
#
# What follows in subsequent rows are the different values that
# can be appended to the prefix, to make up the full command,
# e.g. "PWR01".
#
# One exception are footer lines below all the values which
# contain extra information about the command. These are difficult
# to recognize.
# Try to recognize command footers. Footnotes often start with *.
if (row[0].strip().startswith('*') and not row[1]) or is_known_footer(row[0]):
continue
# Let's recognize command prefix headers. The data in
# those rows looks something like
# ``"PWR" - System Power Command ``, and we need to parse it.
elif not any(row[1:]):
# Ignore a variety of text rows that are similar to a prefix header.
# We need to grasp at straws here, since we can't look at the
# row color, which would also tell us if it's a header.
if row[0].strip().startswith('*'):
continue
if 'when' in row[0] or 'Ex:' in row[0] or 'is shared' in row[0]:
continue
# operation command, command, brakets
prefix, prefix_desc = re.match(r'"?(.*?)"? -\s?(.*)', row[0]).groups()
# Auto-determine a possible command name
name = re.sub(r'\(.*\)$', '', prefix_desc) # Remove trailing brackets
name = re.sub(r'(Operation\s*)?Command\s*$', '', name) # Remove "Operation Command"
name = re.sub(r'(?i)^%s' % re.escape(groupname), '', name) # e.g. for zone2, remove any zone2 prefix.
name = make_command(name)
data.setdefault(prefix, OrderedDict())
data[prefix]['name'] = name
data[prefix]['description'] = prefix_desc
data[prefix]['values'] = OrderedDict()
# We can assume this row tells us a possible argument-suffix for
# the command, and it's receiver support.
else:
value, desc = row[0], row[1]
if not re.search(r'["”“]', value):
range = value
else:
# Parse the value - sometimes ranges are given, split those first
range = re.split(ur'(?<=[\u201c\u201d"”“])-(?=[\u201c\u201d"”“])', value)
# Then, remove the quotes
validate = lambda s: re.match(ur'^[\u201c\u201d”"”“](.*?)["”]$', s)
range = [validate(r).groups()[0] for r in range]
# If it's actually a single value, store as such
# e.g. "UP" as opposed to "0 - 28".
if len(range) == 1:
range = range[0]
# Replace `xx` to make it clearer it's a placeholder
range = range.replace('xx', '{xx}')
# If it's a number, it should always be hex. We could convert
# to base-10, but why bother. They can just as well be treated
# as string commands.
#try:
# range = int(range, 16)
#except ValueError:
# pass
else:
# If it's a range, output all as 10-base for simplicity.
range = [int(i, 16) for i in range]
# Make sure it's hashable
range = tuple(range)
# TODO: This will fail in a number of cases where
# values given as in -10...0...+10
# Beginning from now we want to rewrite this
# to -10 to 10, with a default of 0. The default
# seems to be always 0. I am not sure if this is
# a true default either, or some kind of strange
# range notation Onkyo uses. In any case, the logic
# to parse these ranges into simple pairs is still
# missing. I add this check as a reminder to myself.
# The YAML already has been manually adjusted.
assert len(range) == 2
# Model support
def parse_support(s):
# Sometimes neither Yes or No is given. We
# assume No in those cases.
if not s:
return False
m = re.match(r'(Yes|No)(?:\(\*\))?', c)
if not m:
# Sometimes it doesn't say Yes or No, but as string
# such as RS232C. It seems that this notes always
# imply YES. We log yes and ignore the extra
# information.
return "Yes"
return m.groups()[0]
support = ['Yes' if parse_support(c) else 'No' for c in row[2:]]
# Validate we don't miss anything
assert len(support) == len(modelcols) == len(row[2:])
assert not any([m not in ('Yes','No') for m in support])
# Get a final list of model names
supported_modelcols = [
model for model, yesno in zip(modelcols, support)
if yesno == 'Yes']
supported_models = sum(supported_modelcols, []) # flatten
supported_models.sort()
supported_models = tuple(supported_models) # make hashable
# Because the list of models is often so huge, including it
# directly within the YAML file severely impacts editability and
# readability. Since in post-processing the keys (command names)
# are liable to change as well, we can't use those to associate
# the models lists either.
if not supported_models in model_sets:
setname = 'set%d' % (len(modelsets)+1)
model_sets[supported_models] = setname
else:
setname = model_sets[supported_models]
# Fix up the description
desc = re.sub(r'\*\d*$', '', desc) # remove footnote refs
if desc.startswith('sets'):
# Multiple whitespace here is often used to indicate
# multiple possible values, make it look nicer.
desc = re.sub(r'\s\s\s+', ', ', desc)
# Try to determine a readable name
def remove_dups(name):
# The description often repeats parts that are already part
# of the command name, i.e. the system-power command would
# have a value power-on, when really only "on" is needed.
# Remove parts from name that are already in the command.
command_parts = data[prefix]['name'].split('-')
return '-'.join(
[p for p in name.split('-')
if not p in command_parts and not p == groupname.lower()])
name = None
if range == 'QSTN':
name = 'query'
elif 'nnn' in range or 'bbb' in range:
# With these sorts of values, we already know we can't get
# anything useful out of the long descriptions.
name = None
# When description tells us it sets something, use the what
# as the value name. Except: For wrap-around commands it's better
# to base off the internal name (e.g. up or down).
elif desc.startswith('sets') and not 'Wrap-Around' in desc:
name = desc.replace('sets', '')
name = re.sub(r'\(.*\)$', '', name) # Remove trailing brackets
if ',' in name or '/' in name:
# Commas here (inserted above) indicate multiple values,
# so does /
names = re.split(r'[,/]', name)
name = [remove_dups(make_command(name)) for name in names]
name = FlowStyleTuple([s for s in name if bool(s)])
else:
name = make_command(name)
name = remove_dups(name)
elif isinstance(range, str):
if range == 'TG':
name = 'toggle'
else:
# Use the internal command itself, if it's not a range
name = re.sub(r'\s*Key$', '', range) # sometimes ends in key, remove
name = make_command(name)
this = data[prefix]['values'][range] = OrderedDict()
if name:
this['name'] = name
this['description'] = desc
this['models'] = setname
return data
with open(sys.argv[1], 'r') as f:
try:
book = tablib.Databook().load('xlsx', f.read())
except Exception as e:
raise
# Model sets collect unique combinations of supported models.
model_sets = OrderedDict()
data = OrderedDict((
('main', import_sheet('main', book.sheets()[4], model_sets)),
('zone2', import_sheet('zone2', book.sheets()[5], model_sets)),
('zone3', import_sheet('zone3', book.sheets()[6], model_sets)),
('zone4', import_sheet('zone4', book.sheets()[7], model_sets)),
('dock', import_sheet('dock', book.sheets()[8], model_sets)),
))
data['modelsets'] = OrderedDict(list(zip(list(model_sets.values()), list(model_sets.keys()))))
# The following is what it takes to output proper OrderedDicts with PyYAML.
def represent_odict(dump, tag, mapping, flow_style=None):
"""Like BaseRepresenter.represent_mapping, but does not issue the sort().
"""
value = []
node = yaml.MappingNode(tag, value, flow_style=flow_style)
if dump.alias_key is not None:
dump.represented_objects[dump.alias_key] = node
best_style = True
if hasattr(mapping, 'items'):
mapping = list(mapping.items())
for item_key, item_value in mapping:
node_key = dump.represent_data(item_key)
node_value = dump.represent_data(item_value)
if not (isinstance(node_key, yaml.ScalarNode) and not node_key.style):
best_style = False
if not (isinstance(node_value, yaml.ScalarNode) and not node_value.style):
best_style = False
value.append((node_key, node_value))
if flow_style is None:
if dump.default_flow_style is not None:
node.flow_style = dump.default_flow_style
else:
node.flow_style = best_style
return node
yaml.SafeDumper.add_representer(OrderedDict,
lambda dumper, value: represent_odict(dumper, 'tag:yaml.org,2002:map', value))
# Be sure to not use flow style, since this makes merging in changes harder.,
# except for special tuples, so we have a way to display small multi-value
# sequences in one line.
yaml.SafeDumper.add_representer(FlowStyleTuple,
lambda dumper, value: yaml.SafeDumper.represent_sequence(dumper, 'tag:yaml.org,2002:seq', value, flow_style=True))
print("""# Last generated
# by %s
# from %s
# at %s
#
# This file can and should be manually changed to fix things the
# automatic import didn't and often can't do right. These changes
# should be tracked in source control, so they can be merged with
# new generated versions of the file.
""" % (os.path.basename(sys.argv[0]), os.path.basename(sys.argv[1]), datetime.now()))
print(yaml.safe_dump(data, default_flow_style=False))