-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlogreg_visualize.py
136 lines (106 loc) · 5.15 KB
/
logreg_visualize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Play with TensorBoard
usage:
$ tensorboard --logdir=path/to/log-directory
"""
import sys
import tensorflow as tf
import ipdb
import random
import time
import logreg_online as util
if __name__ == "__main__":
assert len(sys.argv) > 2
SHUFFLE = True
NUM_EPOCHS = 5
train_file_path = sys.argv[1]
eval_file_path = sys.argv[2]
dim = 50
# ファイルをオープン
with open(train_file_path) as f:
train_txt = f.read().strip()
with open(eval_file_path) as f:
test_txt = f.read().strip()
# tfに食わせるデータの取得
train_data, vocab_size = util.read_data(train_txt, -1)
test_data, _ = util.read_data(test_txt, vocab_size)
### グラフの作成 ###
mixed_graph = tf.Graph()
with mixed_graph.as_default():
# 変数の定義
weight = tf.Variable(tf.random_uniform([dim, 2]), name="weight") # 2値分類ゆえ,[dim x 2]
bias = tf.Variable(tf.random_uniform([1, 2]), name="bias") # 2値分類ゆえ,[1 x 2]
embeddings = tf.Variable(tf.random_uniform([vocab_size, dim]), name="embeddings")
# placeholderの定義
# この記述でindicesはリストになる, shape=Noneに注意。indicesの数はmax_indexに関係ない
indices = tf.placeholder(tf.int32, shape=None, name="indices") # tf.shape(indices): [847]
signed_label = tf.placeholder(tf.int32, shape=None, name="signed_label") # tf.shape(signed_label): [] (Scalar)
label = tf.div((signed_label + 1), 2) # {-1,1} --> {0,1}
# 必要な変数
vectors = tf.nn.embedding_lookup(embeddings, indices) # indicesはリスト. tf.shape(vectors):[847, 50]
# keep_dims=Trueを立てないと、rankが1になっちまう(「行列」ではなく、「ベクトル」になる感じ)
ave_vector = tf.reduce_mean(vectors, axis=0, keep_dims=True) # tf.shape(ave_vector):[1, 50]
# logistic regression の計算
logit = tf.add(tf.matmul(ave_vector, weight), bias) # tf.shape(logit): [1,2]
y = tf.nn.softmax(logit) # tf.shape(y): [1,2]
# tf.one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None)
one_hot = tf.one_hot(label, 2) # tf.shape(one_hot): [2]
cross_entropy = -tf.reduce_sum(tf.multiply(one_hot, tf.log(y))) # tf.shape(cross_entropy):[] (scalar)
# トレーニングの設定
optimizer = tf.train.AdamOptimizer() # AdamOptimizerをoptimizerとして設定
train_op = optimizer.minimize(cross_entropy) # train operationを定義
# 評価グラフ
predicted_label = tf.argmax(y, axis=1)
accuracy, accuracy_update_op = tf.metrics.accuracy(label, predicted_label)
precision, precision_update_op = tf.metrics.precision(label, predicted_label)
recall, recall_update_op = tf.metrics.recall(label, predicted_label)
# tensorboard用のsummary
summary01 = tf.summary.scalar("cross_entropy",cross_entropy)
summary02 = tf.summary.histogram("weight",weight)
summary03 = tf.summary.scalar("abs_weight",tf.reduce_sum(tf.square(weight)))
merged = tf.summary.merge_all()
with tf.Session(graph=mixed_graph) as sess:
#example: Fri_Jun__2_16:07:20_2017
board_name = time.ctime(time.time()).replace(" ", "_")
tb_logdir = "/tmp/tensorflow_train/" + board_name
# for tensorboard
train_writer = tf.summary.FileWriter(tb_logdir, graph=sess.graph)
### Training ###
# 初期化処理
train_init_op = tf.group(
tf.global_variables_initializer(),
tf.local_variables_initializer()
)
sess.run(train_init_op)
print("--- training ---")
# train dataをシャッフルする
if SHUFFLE:
# 変数に*を前置するとpositional argumentに変更可
labels_, fvs_ = util.shuffle(*train_data)
else:
labels_, fvs_ = train_data
num_labels = len(labels_)
for epoch in range(NUM_EPOCHS):
for i, (label_, fv_) in enumerate(zip(labels_, fvs_)):
feed = {signed_label:label_, indices:fv_}
_, cur_entropy, summary = sess.run([train_op, cross_entropy, merged], feed_dict=feed)
if i % 200 == 0:
train_writer.add_summary(summary, global_step=(epoch*num_labels + i))
print("epoch:{}\ttrain_data:{}\tcross_entropy:{}".format(epoch, i, cur_entropy))
print("--- training finished ---")
### Evaluation ###
# 初期化処理(local_variableのみ)
eval_init_op = tf.local_variables_initializer()
sess.run(eval_init_op)
print("--- evaluation ---")
labels_, fvs_ = test_data
for i, (label_, fv_) in enumerate(zip(labels_, fvs_)):
feed = {signed_label:label_, indices:fv_}
acc, pre, rec = sess.run([
accuracy_update_op,
precision_update_op,
recall_update_op
], feed_dict=feed)
print("acc:{}\tpre:{}\trec:{}".format(acc, pre, rec))
print("f-measure:", 2*(pre*rec)/(pre+rec))