-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpostech_vis.yaml
133 lines (101 loc) · 3.93 KB
/
postech_vis.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
### Train Predictor Estimator ###
model: estimatorVis
# Model Files will be saved here
output-dir: runs/estimator_vis
#### MODEL SPECIFIC OPTS ####
## ESTIMATOR ##
# If load-model points to a pretrained Estimator,
# These settings are ignored.
# LSTM Settings
hidden-est: 125
rnn-layers-est: 1
dropout-est: 0.0
# Use linear layer to reduce dimension prior to LSTM
mlp-est: True
# Multitask Learning Settings #
# Continue training the predictor on the postedited text.
# If set, will do an additional forward pass through the predictor
# Using the SRC, PE pair and add the `Predictor` loss for the tokens in the
# postedited text PE. Recommended if you have access to PE
# Requires setting train-pe, valid-pe
token-level: False
# Predict Sentence Level Scores
# Requires setting train-sentence-scores, valid-sentence-scores
sentence-level: True
# Use probabilistic Loss for sentence scores instead of squared error.
# If set, the model will output mean and variance of a truncated Gaussian
# distribution over the interval [0, 1], and use log-likelihood loss instead
# of mean squared error.
# Seems to improve performance
sentence-ll: False
# Predict Binary Label for each sentence, indicating hter == 0.0
# Requires setting train-sentence-scores, valid-sentence-scores
binary-level: False
# WMT 18 Format Settings #
# Predict target tags. Requires train-target-tags, valid-target-tags to be set.
predict-target: false
target-bad-weight: 2.5
# Predict source tags. Requires train-source-tags, valid-source-tags to be set.
predict-source: false
source-bad-weight: 2.5
# Predict gap tags. Requires train-target-tags, valid-target-tags to be set.
# and wmt18-format set to true
predict-gaps: false
target-bad-weight: 2.5
### TRAIN OPTS ###
epochs: 10
# Additionally Eval and checkpoint every n training steps
# Explicitly disable by setting to zero (default)
checkpoint-validation-steps: 0
# If False, never save the Models
checkpoint-save: true
# Keep Only the n best models according to the main metric (F1Mult by default)
# USeful to avoid filling the harddrive during a long run
checkpoint-keep-only-best: 3
# If greater than zero, Early Stop after n evaluation cycles without improvement
checkpoint-early-stop-patience: 0
# Print Train Stats Every n batches
log-interval: 100
# LR. Currently ADAM is only optimizer supported.
# 1e-3 * (batch_size / 32) seems to work well
learning-rate: 2e-3
train-batch-size: 64 #50
valid-batch-size: 64
### LOADING ###
# Load pretrained (sub-)model.
# If set, the model architecture params are ignored.
# As the vocabulary of the pretrained model will be used,
# all vocab-params will also be ignored.
# (i) load-pred-source or load-pred-target: Predictor instance
# -> a new Estimator is initialized with the given predictor(s).
# (ii) load-model: Estimator instance.
# As the Predictor is a submodule of the Estimator,
# load-pred-{source,target} will be ignored if this is set.
# load-model: path_to_estimator
# load-pred-source: path_to_predictor_source_target
load-pred-target: runs/predictor/best_model.torch
### DATA ###
# Set to True to use target_tags in WMT18 format
wmt18-format: false
train-source: data/train.src #WMT17/word_level/train.src
train-target: data/train.mt #WMT17/word_level/train.mt
#train-pe: data/WMT17/word_level/train.pe
train-sentence-scores: data/train.mt.wsent-mqm #WMT17/word_level/train.tags
#train-sentence-visual: data/train.vis
train-visual: data/train.vis
valid-source: data/dev.src #WMT17/word_level/dev.src
valid-target: data/dev.mt #WMT17/word_level/dev.mt
#valid-pe: data/WMT17/word_level/dev.pe
valid-sentence-scores: data/dev.mt.wsent-mqm #WMT17/word_level/dev.tags
#valid-sentence-visual: data/dev.vis
valid-visual: data/dev.vis
visual_feature_size: 4096 # Multimodality
### GENERAL OPTS ###
# Experiment Name for MLFlow
experiment-name: sent-level
# Do not set or set to negative number for CPU
gpu-id: 1
visual-strategy: last
visual-method: mult
# Random
seed: 42