-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathcfp_common_v1.py
101 lines (84 loc) · 4.34 KB
/
cfp_common_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# This file is for GRAMMAR_VERSION == 0. Future versions may have to wrap it
# in a class to access it when decoding older grammars.
import cfp_common
import nltk
import sys
class CfpCommonV1(cfp_common.CfpCommon):
# top-level section -> (list weight, list nonterminal for that section)
list_weights = {nltk.Nonterminal("CFP_TOPIC_SECTION"):
(1,nltk.Nonterminal("CFP_TOPIC_LIST")),
nltk.Nonterminal("LOC_SECTION"):
(.5,nltk.Nonterminal("LOC_LIST")),
nltk.Nonterminal("ORGS_SECTION"):
(1,nltk.Nonterminal("ORGS_LIST")),
nltk.Nonterminal("STEER_SECTION"):
(1,nltk.Nonterminal("STEER_LIST")),
nltk.Nonterminal("KEYNOTE_SECTION"):
(7,nltk.Nonterminal("KEYNOTE_LIST")),
nltk.Nonterminal("PC_SECTION"):
(5,nltk.Nonterminal("PC_LIST"))}
recursive_terms = [nltk.Nonterminal("CFP_TOPIC_LIST"),
nltk.Nonterminal("PROF_LIST_PAREN"),
nltk.Nonterminal("PROF_LIST_COMMA"),
nltk.Nonterminal("PROF_LIST_DASH"),
nltk.Nonterminal("LOC_LIST"),
nltk.Nonterminal("KEYNOTE_LIST_DASH")]
newline_terms = {nltk.Nonterminal("CFP_GREETING"):1,
nltk.Nonterminal("CFP_TOPIC_HEADER"):1,
nltk.Nonterminal("CFP_TOPIC_LIST_ITEM"):1,
nltk.Nonterminal("PROF_LIST_PAREN_ITEM"):1,
nltk.Nonterminal("PROF_LIST_COMMA_ITEM"):1,
nltk.Nonterminal("PROF_LIST_DASH_ITEM"):1,
nltk.Nonterminal("KEYNOTE_ITEM_DASH"):1,
nltk.Nonterminal("ORGS_HEADER"):1,
nltk.Nonterminal("PC_HEADER"):1,
nltk.Nonterminal("STEER_HEADER"):1,
nltk.Nonterminal("KEYNOTE_HEADER"):1,
nltk.Nonterminal("LOC_HEADER"):1,
nltk.Nonterminal("LOC_PLACE_ITEM"):1,
nltk.Nonterminal("LOC_UNIV_ITEM"):1,
nltk.Nonterminal("DATE_HEADER"):1,
nltk.Nonterminal("SUBSTITUTE_DATE_NL"):1,
nltk.Nonterminal("DATE_TYPE_1_NL"):1,
nltk.Nonterminal("DATE_TYPE_2_NL"):1,
nltk.Nonterminal("DATE_TYPE_3_NL"):1,
nltk.Nonterminal("DATE_TYPE_4_NL"):1,
nltk.Nonterminal("CFP_INTRO_SECTION"):1,
nltk.Nonterminal("CFP_SCOPE_SECTION"):1,
nltk.Nonterminal("CFP_SUBMIT_SECTION"):1,
nltk.Nonterminal("SPACE_NEWLINE"):1}
last_or_not_terms = {nltk.Nonterminal("SUBMIT_CLOSING"):False}
@staticmethod
def version():
return 1
def chars_to_remove_a_space_before(self):
return '.,:;\?\)\!'
def chars_to_remove_a_space_after(self):
return '\('
def list_recursive_terms(self):
return CfpCommonV1.recursive_terms
def append_newlines(self):
return CfpCommonV1.newline_terms
def choose_last_or_nots(self):
return CfpCommonV1.last_or_not_terms
def calc_list_bits(self, msg_len, body_prod):
# we only care about lists that are actually used in the body
used_lists = {w[1]: w[0] for l,w in self.list_weights.iteritems()
if l in body_prod.rhs()}
total_weight = sum(used_lists.values())
# we'll get most of our entropy from lists, but we should make
# sure that the bits are spread out among the lists as much as
# possible. So given a set of lists, each with weight w (total
# weight of W), and a number of bits remaining = B, make sure
# B*w/W bits are used up in this list. Multiply by some fraction
# since other parts of the message will use some bits too.
fraction_in_lists = 0.85
list_bits = {}
for l,w in used_lists.iteritems():
list_bits[l] = int(msg_len*fraction_in_lists*w/total_weight)
return list_bits
def header_cfg_filename(self):
return "cfp_header.cfg"
def body_cfg_filename(self):
return "cfp_body.cfg"
cfp_common.CfpCommon.register_common(CfpCommonV1)