-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtransform_modules.py
116 lines (98 loc) · 5.32 KB
/
transform_modules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
"""
Simplified demo code implemented based on Tensorflow 1 for NeurIPS'22 paper:
"Pre-trained model reusability evaluation for small-data transfer learning".
The code is NOT runnable: it is only used for illustrating the task and model transform modules.
"""
import tensorflow as tf
xavier_initializer = tf.contrib.layers.xavier_initializer(dtype=tf.float32)
class TaskTransform:
def __init__(self, input_dim=640, output_dim=640, scope='task_transform', reuse=True):
self.input_dim, self.output_dim, self.scope = input_dim, output_dim, scope
self.weights = {}
with tf.variable_scope(self.scope, reuse):
self.weights['w_q'] = tf.get_variable('w_q', [input_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['w_k'] = tf.get_variable('w_k', [input_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['w_v'] = tf.get_variable('w_v', [input_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['w_fc'] = tf.get_variable('w_fc', [output_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['b_fc'] = tf.get_variable('b_fc', [output_dim],
initializer=xavier_initializer, dtype=tf.float32)
def task_transform_block(self, inp):
"""
INPUT:
inp: input feature for each task class
OUTPUT:
each_class_x: output feature for each task class
x: output feature for task
"""
w_k, w_v, w_q = self.weights['w_k'], self.weights['w_k'], self.weights['w_k']
w_fc, b_fc = self.weights['w_fc'], self.weights['b_fc']
outp1 = inp
k, v, q = inp @ w_k, inp @ w_v, inp @ w_q
a = tf.reduce_sum(tf.multiply(tf.expand_dims(q, axis=2), tf.expand_dims(k, axis=1)), axis=-1) \
/ tf.sqrt(tf.cast(self.output_dim, tf.float32))
c = a @ v
outp1 = outp1 + c
outp2 = outp1
outp2 = tf.reduce_mean(outp2, axis=-2)
outp2 = tf.matmul(outp2, w_fc) + b_fc
return outp1, outp2
def forward(self, inp, reuse=True):
with tf.variable_scope(self.scope, reuse=reuse):
each_class_x, x = self.task_transform_block(inp)
return each_class_x, x
class ModelTransform:
def __init__(self, input_dim=640, output_dim=640, scope='model_transform', reuse=True):
self.weights = {}
self.input_dim, self.output_dim, self.scope = input_dim, output_dim, scope
with tf.variable_scope(scope, reuse):
self.weights['w_q'] = tf.get_variable('w_q', [input_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['w_k'] = tf.get_variable('w_k', [input_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['w_v'] = tf.get_variable('w_v', [input_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['w_fc'] = tf.get_variable('w_fc', [output_dim, output_dim],
initializer=xavier_initializer, dtype=tf.float32)
self.weights['b_fc'] = tf.get_variable('b_fc', [output_dim],
initializer=xavier_initializer, dtype=tf.float32)
def weighted_ensemble_layer(self, z_c, p):
"""
INPUT:
z_c: feature to combine.
p: weight.
OUTPUT:
z: weighted combined feature.
"""
gamma = p/(tf.repeat(tf.maximum(tf.reduce_sum(p, axis=-1, keepdims=True), 1.0), tf.shape(p)[-1], axis=-1))
gam = tf.repeat(tf.expand_dims(gamma, axis=-1), tf.shape(z_c)[-1], axis=-1)
z = tf.multiply(z_c, gam)
return z
def model_transform_block(self, inp, each_class_x):
"""
INPUT:
inp: input feature for each model class
each_class_x: input feature for each task class
OUTPUT:
c: output feature for model
a: task-model attention weight, used for attention supervision.
"""
w_k, w_v, w_q = self.weights['w_k'], self.weights['w_v'], self.weights['w_q']
w_fc, b_fc = self.weights['w_fc'], self.weights['b_fc']
v, k, q = inp @ w_v, inp @ w_k, each_class_x @ w_q
norm_q = tf.sqrt(tf.reduce_sum(q ** 2, axis=-1, keepdims=True))
norm_k = tf.sqrt(tf.reduce_sum(k ** 2, axis=-1, keepdims=True))
norm_factor = (1e-5+tf.tensordot(norm_q,norm_k,axes=[[-1],[-1]])
/tf.sqrt(tf.cast(self.output_dim,tf.float32)))
a = tf.tensordot(q, k, axes=[[-1], [-1]]) / norm_factor
outp = self.weighted_ensemble_layer(v, a)
outp = tf.reduce_sum(outp, axis=-2)
outp = tf.matmul(outp, w_fc) + b_fc
return outp, a
def forward(self, inp, inp_q, reuse=True):
with tf.variable_scope(self.scope, reuse=reuse):
c, a = self.model_transform_block(inp, inp_q)
return c, a