diff --git a/.gitignore b/.gitignore index f3643de..bf6dfb4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,7 @@ __pycache__ -build +*.pyc trf.egg-info -tests/faster-rnnlm -tests/uniq.dat -tests/test.input -tests/rnnlm.output -tests/__init__.pyc -tests/test_acceptability.pyc + +build tools data diff --git a/tests/test_acceptability.py b/tests/test_acceptability.py index 4f12787..7b51d8e 100644 --- a/tests/test_acceptability.py +++ b/tests/test_acceptability.py @@ -1,6 +1,4 @@ import unittest -import warnings -import tempfile from trf.acceptability import Acceptability from trf.util import check_executable @@ -20,34 +18,34 @@ def setUp(self): self.delimiter, self.rnnlm_model_path) - def test_rnnlm_scores(self): - scores = self.acceptability.rnnlm_scores + def test_log_prob(self): + scores = self.acceptability._calc_log_prob_scores() self.assertAlmostEqual(scores[0], -11.571, places=2) def test_unigram_scores(self): - scores = self.acceptability.unigram_scores + scores = self.acceptability._calc_unigram_scores() self.assertAlmostEqual(scores[0], -31.457, places=2) - # def test_mean_unigram_scores(self): + def test_mean_lp_scores(self): - # scores = self.acceptability.mean_unigram_scores - # self.assertAlmostEqual(scores[0], -2.12, places=2) + score = self.acceptability.mean_lp + self.assertAlmostEqual(score, -2.892, places=2) - # def test_normalized_scores_div(self): + def test_norm_lp_div(self): - # scores = self.acceptability.normalized_scores_div - # self.assertAlmostEqual(scores[0], -5.446, places=2) + score = self.acceptability.norm_lp_div + self.assertAlmostEqual(score, -0.3678, places=2) - # def test_normalized_scores_sub(self): + def test_norm_lp_sub(self): - # scores = self.acceptability.normalized_scores_sub - # self.assertAlmostEqual(scores[0], -9.447, places=2) + score = self.acceptability.norm_lp_sub + self.assertAlmostEqual(score, 19.885, places=2) - # def test_normalized_scores_len(self): + def test_slor(self): - # scores = self.acceptability.normalized_scores_len - # self.assertAlmostEqual(scores[0], -0.9447, places=2) + score = self.acceptability.slor + self.assertAlmostEqual(score, 4.9713, places=2) def tearDown(self): pass diff --git a/trf/__init__.pyc b/trf/__init__.pyc new file mode 100644 index 0000000..ed8b29e Binary files /dev/null and b/trf/__init__.pyc differ diff --git a/trf/acceptability.py b/trf/acceptability.py index 2328616..cb9f944 100644 --- a/trf/acceptability.py +++ b/trf/acceptability.py @@ -8,7 +8,6 @@ from janome.tokenizer import Tokenizer import trf.constant as const -from trf.analyser import Tree from trf.util import split_text @@ -17,8 +16,8 @@ class Acceptability: def __init__(self, text: str, delimiter: str, rnnlm_model_path: str): self.text = text - self.sentences = split_text(text, delimiter) - self.tss = tokenize_by_janome(self.sentences) + self.sentences = split_text(text, delimiter) # type: List[str] + lengths, self.tss = tokenize(self.sentences) if not os.path.isfile(rnnlm_model_path): raise FileNotFoundError(errno.ENOENT, @@ -28,26 +27,27 @@ def __init__(self, text: str, delimiter: str, rnnlm_model_path: str): self.word_freq, self.n_total_words = self._load_word_freq(threshold=1) - self.rnnlm_scores = self.get_rnnlm_scores() - self.unigram_scores = self.calc_unigram_scores() - - self.mean_unigram_scores = self.calc_mean_unigram_scores() - - # self.normalized_scores_div = \ - # self.calc_normalized_scores('div') - - # self.normalized_scores_sub = \ - # self.calc_normalized_scores('sub') - - # self.normalized_scores_len = \ - # self.calc_normalized_scores('len') - - self.mean_loglikelihood = \ - None \ - if None in self.rnnlm_scores \ - else numpy.mean(self.rnnlm_scores) - - def get_rnnlm_scores(self) -> List[Union[None, float]]: + log_prob_scores = \ + self._calc_log_prob_scores() + unigram_scores = \ + self._calc_unigram_scores() + + mean_lp_scores = \ + calc_mean_lp_scores(log_prob_scores, lengths) + norm_lp_div_scores = \ + calc_norm_lp_div_scores(log_prob_scores, unigram_scores) + norm_lp_sub_scores = \ + calc_norm_lp_sub_scores(log_prob_scores, unigram_scores) + slor_scores = \ + calc_slor_scores(norm_lp_sub_scores, lengths) + + self.log_prob = average(log_prob_scores) + self.mean_lp = average(mean_lp_scores) + self.norm_lp_div = average(norm_lp_div_scores) + self.norm_lp_sub = average(norm_lp_sub_scores) + self.slor = average(slor_scores) + + def _calc_log_prob_scores(self) -> List[Union[None, float]]: """Get log likelihood scores by calling RNNLM """ @@ -62,7 +62,7 @@ def get_rnnlm_scores(self) -> List[Union[None, float]]: '-test', textfile.name] process = Popen(command, stdout=PIPE, stderr=PIPE) - output , err = process.communicate() + output, err = process.communicate() lines = [line.strip() for line in output.decode('UTF-8').split('\n') if line.strip() != ''] scores = [] @@ -95,7 +95,7 @@ def _load_word_freq(self, threshold: int) -> Tuple[Dict[str, int], int]: return (word_freq, n_total_words) - def calc_unigram_scores(self) -> List[float]: + def _calc_unigram_scores(self) -> List[float]: unigram_scores = [] for ts in self.tss: @@ -110,47 +110,120 @@ def calc_unigram_scores(self) -> List[float]: return unigram_scores - def calc_mean_unigram_scores(self) -> List[Union[None, float]]: - mean_unigram_scores = [] - for score, sentence in zip(self.unigram_scores, self.sentences): - n = len(self.sentences) - x = None \ - if score is None or n == 0 \ - else float(score) / float(len(self.sentences)) - mean_unigram_scores.append(x) - return mean_unigram_scores - - def calc_normalized_scores(self, method: str) -> List[Union[None, float]]: - - normalized_scores = [] - for score, unigram_score, s in zip(self.rnnlm_scores, - self.unigram_scores, - self.sentences): - x = None \ - if score is None or numpy.isclose(unigram_score, - 0.0, rtol=1e-05) \ - else _f(score, unigram_score, len(s), method) - normalized_scores.append(x) - return normalized_scores - - -def _f(score: float, unigram_score: float, length: int, method: str) -> float: - - if method == 'div': - return (-1) * float(score) / float(unigram_score) - elif method == 'sub': - return float(score) - float(unigram_score) - elif method == 'len': - return (float(score) - float(unigram_score)) / length - else: - raise ValueError - - -def tokenize_by_janome(sentences: List[str]) -> List[List[str]]: + +def average(xs: List[Union[None, float]]) -> float: + """Calculate the arithmetic mean of the given values (possibly None) + >>> '{:.2f}'.format(average([None, 1.0, 2.0])) + '1.50' + """ + return numpy.mean([x for x in xs if x is not None]) + + +def calc_mean_lp_scores(log_prob_scores: List[float], + lengths: List[int]) -> List[Union[None, float]]: + r""" + .. math: + \frac{% + \log P_\text{model}\left(\xi\right) + }{% + \text{length}\left(\xi\right) + } + >>> '{:.3f}'.format(calc_mean_lp_scores([-14.7579], [4])[0]) + '-3.689' + """ + mean_lp_scores = [] + for score, length in zip(log_prob_scores, lengths): + x = None \ + if score is None or length == 0 \ + else float(score) / float(length) + mean_lp_scores.append(x) + return mean_lp_scores + + +def calc_norm_lp_div_scores( + log_prob_scores: List[float], + unigram_scores: List[float]) -> List[Union[None, float]]: + r""" + .. math: + \frac{% + \log P_\text{model}\left(\xi\right) + }{% + \log P_\text{unigram}\left(\xi\right) + } + >>> '{:.3f}'.format(calc_norm_lp_div_scores([-14.7579], [-35.6325])[0]) + '-0.414' + """ + results = [] + for log_prob, unigram_score in zip(log_prob_scores, unigram_scores): + if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05): + x = None + else: + x = (-1.0) * float(log_prob) / float(unigram_score) + results.append(x) + return results + + +def calc_norm_lp_sub_scores( + log_prob_scores: List[float], + unigram_scores: List[float]) -> List[Union[None, float]]: + r""" + .. math: + \log P_\text{model}\left(\xi\right) + - \log P_\text{unigram}\left(\xi\right) + >>> '{:.3f}'.format(calc_norm_lp_sub_scores([-14.7579], [-35.6325])[0]) + '20.875' + """ + + results = [] + for log_prob, unigram_score in zip(log_prob_scores, unigram_scores): + if log_prob is None or numpy.isclose(unigram_score, 0.0, rtol=1e-05): + x = None + else: + x = float(log_prob) - float(unigram_score) + results.append(x) + return results + + +def calc_slor_scores(norm_lp_sub_scores: List[float], + lengths: List[int]) -> List[Union[None, float]]: + r"""Calculate SLOR (Syntactic Log-Odds Ratio) + .. math: + \frac{% + \log P_\text{model}\left(\xi\right) + - \log P_\text{unigram}\left(\xi\right) + }{% + \text{length}\left(\xi\right) + } + >>> '{:.3f}'.format(calc_slor_scores([20.8746], [4])[0]) + '5.219' + """ + + results = [] + for norm_lp_sub_score, length in zip(norm_lp_sub_scores, lengths): + if (norm_lp_sub_score is None) or length == 0: + x = None + else: + x = norm_lp_sub_score / length + results.append(x) + return results + + +def tokenize(sentences: List[str]) -> Tuple[List[int], List[List[str]]]: + tokenizer = Tokenizer() - tss = [] + lengths = [] + texts = [] for s in sentences: result = tokenizer.tokenize(s) - ts = ' '.join([t.surface for t in result]) - tss.append(ts) - return tss + + surfaces = [t.surface for t in result] + lengths.append(len(surfaces)) + + text = ' '.join(surfaces) + texts.append(text) + return lengths, texts + + +if __name__ == '__main__': + import doctest + doctest.testmod(verbose=True) diff --git a/trf/cmdline.py b/trf/cmdline.py index 7280e14..8717917 100644 --- a/trf/cmdline.py +++ b/trf/cmdline.py @@ -21,8 +21,16 @@ def translate(en: str): return '係り受け木の深さ' elif en == 'r_conditional': return '仮定節' - elif en == 'mean_loglikelihood': - return '言語モデルの尤度' + elif en == 'log_prob': + return '容認度 (LogProb)' + elif en == 'mean_lp': + return '容認度 (Mean LP)' + elif en == 'norm_lp_div': + return '容認度 (Norm LP (Div))' + elif en == 'norm_lp_sub': + return '容認度 (Norm LP (Sub))' + elif en == 'slor': + return '容認度 (SLOR)' else: return en @@ -64,6 +72,11 @@ def show(self, lang: str='ja'): print('Unsupported language') sys.exit(1) + +def _f(score: float) -> str: + return 'None' if score is None else '{:.2f}'.format(score) + + def main(): executables = ['juman', 'knp', 'rnnlm'] @@ -128,13 +141,14 @@ def main(): Section('syntax', metrics).show() metrics = [] - acceptability = \ - Acceptability(text, + a = Acceptability(text, args.delimiter, args.rnnlm_model_path) - score = acceptability.mean_loglikelihood - score = 'None' if score is None else '{:.2f}'.format(score) - metrics.append(Metric('mean_loglikelihood', score)) + metrics.append(Metric('log_prob', _f(a.log_prob))) + metrics.append(Metric('mean_lp', _f(a.mean_lp))) + metrics.append(Metric('norm_lp_div', _f(a.norm_lp_div))) + metrics.append(Metric('norm_lp_sub', _f(a.norm_lp_sub))) + metrics.append(Metric('slor', _f(a.slor))) Section('language_model', metrics).show() diff --git a/trf/constant.pyc b/trf/constant.pyc new file mode 100644 index 0000000..c71346c Binary files /dev/null and b/trf/constant.pyc differ