Skip to content

Commit

Permalink
Add formulae in docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
pecorarista committed Nov 8, 2017
1 parent 2a9e7b0 commit 442f425
Showing 1 changed file with 75 additions and 29 deletions.
104 changes: 75 additions & 29 deletions trf/acceptability.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ def _load_word_freq(self, threshold: int) -> Tuple[Dict[str, int], int]:

return (word_freq, n_total_words)

def average(xs: List[Union[None, float]]) -> float:
return 0.0

def calc_unigram_scores(self) -> List[float]:

unigram_scores = []
Expand All @@ -99,36 +96,80 @@ def calc_unigram_scores(self) -> List[float]:

return unigram_scores

def calc_mean_lp_scores(self) -> List[Union[None, float]]:
mean_lp_scores = []
for score, length in zip(self.log_prob_scores, self.lenghts):
x = None \
if score is None or length == 0 \
else float(score) / float(length)
mean_lp_scores.append(x)
return mean_lp_scores

def calc_normalized_scores(self, method: str) -> List[Union[None, float]]:

normalized_scores = []
for score, unigram_score, s in zip(self.rnnlm_scores,
self.unigram_scores,
self.sentences):
x = None \
if score is None or numpy.isclose(unigram_score,
0.0, rtol=1e-05) \
else _f(score, unigram_score, len(s), method)
normalized_scores.append(x)
return normalized_scores

def average(xs: List[Union[None, float]]) -> float:
"""
>>> '{:.2f}'.format(average([None, 1.0, 2.0]))
'1.50'
"""
return numpy.mean([x for x in xs if x is not None])


def calc_mean_lp_scores(log_prob_scores: List[float],
lengths: List[int]) -> List[Union[None, float]]:
r"""
.. math:
\frac{%
\log P_\text{model}\left(\xi\right)
}{%
\text{length}\left(\xi\right)
}
>>> '{:.3f}'.format(calc_mean_lp_scores([-14.7579], [4])[0])
'-3.689'
"""
mean_lp_scores = []
for score, length in zip(log_prob_scores, lengths):
x = None \
if score is None or length == 0 \
else float(score) / float(length)
mean_lp_scores.append(x)
return mean_lp_scores


def calc_norm_lp_div(log_prob_scores: List[float],
unigram_scores: List[float]) -> List[Union[None, float]]:
r"""
.. math:
\frac{%
\log P_\text{model}\left(\xi\right)
}{%
\log P_\text{unigram}\left(\xi\right)
}
>>> '{:.3f}'.format(calc_norm_lp_div([-14.7579], [-35.6325])[0])
'-0.414'
"""
results = []
for log_prob, unigram_score in zip(log_prob_scores, unigram_scores):
if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05):
x = None
else:
x = (-1.0) * float(log_prob) / float(unigram_score)
results.append(x)
return results


def calc_norm_lp_sub(log_prob_scores: List[float],
unigram_scores: List[float]) -> List[Union[None, float]]:
r"""
.. math:
\log P_\text{model}\left(\xi\right)
- \log P_\text{unigram}\left(\xi\right)
"""

results = []
for log_prob, unigram_score, length in zip(log_prob_scores,
unigram_scores):
if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05):
x = None
else:
x = float(log_prob) - float(unigram_score)
results.append(x)
return results


def _f(score: float, unigram_score: float, length: int, method: str) -> float:

if method == 'div':
return (-1) * float(score) / float(unigram_score)
elif method == 'sub':
return float(score) - float(unigram_score)
elif method == 'len':
if method == 'len':
return (float(score) - float(unigram_score)) / length
else:
raise ValueError
Expand All @@ -148,3 +189,8 @@ def tokenize(sentences: List[str]) -> Tuple[List[int], List[List[str]]]:
text = ' '.join(surfaces)
texts.append(text)
return lengths, texts


if __name__ == '__main__':
import doctest
doctest.testmod(verbose=True)

0 comments on commit 442f425

Please sign in to comment.