Skip to content
This repository has been archived by the owner on Aug 10, 2023. It is now read-only.

Commit

Permalink
August 2020 update
Browse files Browse the repository at this point in the history
  • Loading branch information
liuqiuhui2015 committed Aug 31, 2020
1 parent b453baa commit 49b95af
Show file tree
Hide file tree
Showing 72 changed files with 2,183 additions and 544 deletions.
8 changes: 4 additions & 4 deletions adv/predict/doc/para/predict_doc_para.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys

import torch
from torch.cuda.amp import autocast

from tqdm import tqdm

Expand Down Expand Up @@ -50,10 +51,8 @@ def load_fixing(module):

mymodel.eval()

use_cuda = cnfg.use_cuda
gpuid = cnfg.gpuid

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda_decode(cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding)
use_amp = cnfg.use_amp and use_cuda

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)
Expand Down Expand Up @@ -81,7 +80,8 @@ def load_fixing(module):
seq_batch = seq_batch.to(cuda_device)
bsize, _nsent, seql = seq_batch.size()
_nsent_use = _nsent - 1
output = mymodel.decode(seq_batch.narrow(1, 1, _nsent_use).contiguous(), seq_batch.narrow(1, 0, _nsent_use).contiguous(), beam_size, None, length_penalty).view(bsize, _nsent_use, -1)
with autocast(enabled=use_amp):
output = mymodel.decode(seq_batch.narrow(1, 1, _nsent_use).contiguous(), seq_batch.narrow(1, 0, _nsent_use).contiguous(), beam_size, None, length_penalty).view(bsize, _nsent_use, -1)
if multi_gpu:
tmp = []
for ou in output:
Expand Down
100 changes: 100 additions & 0 deletions adv/predict/predict_ape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#encoding: utf-8

import sys

import torch
from torch.cuda.amp import autocast

from tqdm import tqdm

import h5py

import cnfg.base as cnfg
from cnfg.ihyp import *

from transformer.APE.NMT import NMT
from transformer.EnsembleNMT import NMT as Ensemble
from parallel.parallelMT import DataParallelMT

from utils.base import *
from utils.fmt.base import ldvocab, reverse_dict, eos_id
from utils.fmt.base4torch import parse_cuda_decode

def load_fixing(module):

if "fix_load" in dir(module):
module.fix_load()

td = h5py.File(cnfg.test_data, "r")

ntest = td["ndata"][:].item()
nwordi = td["nword"][:].tolist()[0]
vcbt, nwordt = ldvocab(sys.argv[2])
vcbt = reverse_dict(vcbt)

if len(sys.argv) == 4:
mymodel = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

mymodel = load_model_cpu(sys.argv[3], mymodel)
mymodel.apply(load_fixing)

else:
models = []
for modelf in sys.argv[3:]:
tmp = NMT(cnfg.isize, nwordi, nwordt, cnfg.nlayer, cnfg.ff_hsize, cnfg.drop, cnfg.attn_drop, cnfg.share_emb, cnfg.nhead, cache_len_default, cnfg.attn_hsize, cnfg.norm_output, cnfg.bindDecoderEmb, cnfg.forbidden_indexes)

tmp = load_model_cpu(modelf, tmp)
tmp.apply(load_fixing)

models.append(tmp)
mymodel = Ensemble(models)

mymodel.eval()

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda_decode(cnfg.use_cuda, cnfg.gpuid, cnfg.multi_gpu_decoding)

use_amp = cnfg.use_amp and use_cuda

set_random_seed(cnfg.seed, use_cuda)

if use_cuda:
mymodel.to(cuda_device)
if multi_gpu:
mymodel = DataParallelMT(mymodel, device_ids=cuda_devices, output_device=cuda_device.index, host_replicate=True, gather_output=False)

beam_size = cnfg.beam_size

length_penalty = cnfg.length_penalty

ens = "\n".encode("utf-8")

# using tgt instead of mt since data are processed by tools/mkiodata.py for the mt task
src_grp, mt_grp = td["src"], td["tgt"]
with open(sys.argv[1], "wb") as f:
with torch.no_grad():
for i in tqdm(range(ntest)):
seq_batch = torch.from_numpy(src_grp[str(i)][:]).long()
seq_mt = torch.from_numpy(mt_grp[str(i)][:]).long()
if use_cuda:
seq_batch = seq_batch.to(cuda_device)
seq_mt = seq_mt.to(cuda_device)
with autocast(enabled=use_amp):
output = mymodel.decode(seq_batch, seq_mt, beam_size, None, length_penalty)
if multi_gpu:
tmp = []
for ou in output:
tmp.extend(ou.tolist())
output = tmp
else:
output = output.tolist()
for tran in output:
tmp = []
for tmpu in tran:
if tmpu == eos_id:
break
else:
tmp.append(vcbt[tmpu])
f.write(" ".join(tmp).encode("utf-8"))
f.write(ens)

td.close()
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys

import torch
from torch.cuda.amp import autocast

from tqdm import tqdm

Expand All @@ -23,6 +24,7 @@
from loss.base import LabelSmoothingLoss

from utils.base import *
from utils.fmt.base import pad_id
from utils.fmt.base4torch import parse_cuda

def load_fixing(module):
Expand Down Expand Up @@ -57,9 +59,10 @@ def load_fixing(module):

mymodel.eval()

lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=0, reduction='none', forbidden_index=cnfg.forbidden_indexes)
lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='none', forbidden_index=cnfg.forbidden_indexes)

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid)
use_amp = cnfg.use_amp and use_cuda

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)
Expand Down Expand Up @@ -91,10 +94,11 @@ def load_fixing(module):
seq_o = seq_o.narrow(1, 1, _nsent_use)
oi = seq_o.narrow(-1, 0, lo).contiguous()
ot = seq_o.narrow(-1, 1, lo).contiguous()
output = mymodel(seq_batch.narrow(1, 1, _nsent_use).contiguous(), oi, seq_batch.narrow(1, 0, _nsent_use).contiguous()).view(bsize, _nsent_use, lo, -1)
loss = lossf(output, ot).sum(-1).view(bsize, -1).sum(-1)
with autocast(enabled=use_amp):
output = mymodel(seq_batch.narrow(1, 1, _nsent_use).contiguous(), oi, seq_batch.narrow(1, 0, _nsent_use).contiguous()).view(bsize, _nsent_use, lo, -1)
loss = lossf(output, ot).sum(-1).view(bsize, -1).sum(-1)
if norm_token:
lenv = ot.ne(0).int().view(bsize, -1).sum(-1).to(loss)
lenv = ot.ne(pad_id).int().view(bsize, -1).sum(-1).to(loss)
loss = loss / lenv
f.write("\n".join([str(rsu) for rsu in loss.tolist()]).encode("utf-8"))
loss = output = ot = seq_batch = seq_o = None
Expand Down
12 changes: 8 additions & 4 deletions adv/rank/doc/rank_loss_sent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys

import torch
from torch.cuda.amp import autocast

from tqdm import tqdm

Expand All @@ -23,6 +24,7 @@
from loss.base import LabelSmoothingLoss

from utils.base import *
from utils.fmt.base import pad_id
from utils.fmt.base4torch import parse_cuda

def load_fixing(module):
Expand Down Expand Up @@ -57,9 +59,10 @@ def load_fixing(module):

mymodel.eval()

lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=0, reduction='none', forbidden_index=cnfg.forbidden_indexes)
lossf = LabelSmoothingLoss(nwordt, cnfg.label_smoothing, ignore_index=pad_id, reduction='none', forbidden_index=cnfg.forbidden_indexes)

use_cuda, cuda_device, cuda_devices, multi_gpu = parse_cuda(cnfg.use_cuda, cnfg.gpuid)
use_amp = cnfg.use_amp and use_cuda

# Important to make cudnn methods deterministic
set_random_seed(cnfg.seed, use_cuda)
Expand Down Expand Up @@ -87,10 +90,11 @@ def load_fixing(module):
seq_o = seq_o.to(cuda_device)
lo = seq_o.size(-1) - 1
ot = seq_o.narrow(-1, 1, lo).contiguous()
output = mymodel(seq_batch.view(ebsize, -1), seq_o.narrow(-1, 0, lo).contiguous().view(ebsize, -1)).view(bsize, nsent, lo, -1)
loss = lossf(output, ot).sum(-1).view(bsize, -1).sum(-1)
with autocast(enabled=use_amp):
output = mymodel(seq_batch.view(ebsize, -1), seq_o.narrow(-1, 0, lo).contiguous().view(ebsize, -1)).view(bsize, nsent, lo, -1)
loss = lossf(output, ot).sum(-1).view(bsize, -1).sum(-1)
if norm_token:
lenv = ot.ne(0).int().view(bsize, -1).sum(-1).to(loss)
lenv = ot.ne(pad_id).int().view(bsize, -1).sum(-1).to(loss)
loss = loss / lenv
f.write("\n".join([str(rsu) for rsu in loss.tolist()]).encode("utf-8"))
loss = output = ot = seq_batch = seq_o = None
Expand Down
Loading

0 comments on commit 49b95af

Please sign in to comment.