Skip to content

Commit

Permalink
update preprocessors and utils
Browse files Browse the repository at this point in the history
  • Loading branch information
keonlee9420 committed Feb 20, 2022
1 parent 4746fcc commit 1eff7f0
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 13 deletions.
2 changes: 1 addition & 1 deletion config/VCTK/preprocess.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
dataset: "VCTK"

path:
corpus_path: "/mnt/nfs2/speech-datasets/en/VCTK-Corpus-92"
corpus_path: "/mnt/nfs2/speech-datasets/en/VCTK-Corpus-0.92"
wav_tag: "mic1"
wav_dir: "wav48_silence_trimmed"
txt_dir: "txt"
Expand Down
40 changes: 35 additions & 5 deletions preprocessor/ljspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,27 @@ def __init__(self, config):
config["preprocessing"]["mel"]["mel_fmin"],
config["preprocessing"]["mel"]["mel_fmax"],
)
self.val_prior = self.val_prior_names(os.path.join(self.out_dir, "val.txt"))

def val_prior_names(self, val_prior_path):
val_prior_names = set()
if os.path.isfile(val_prior_path):
print("Load pre-defined validation set...")
with open(val_prior_path, "r", encoding="utf-8") as f:
for m in f.readlines():
val_prior_names.add(m.split("|")[0])
return list(val_prior_names)
else:
return None

def build_from_path(self):
os.makedirs((os.path.join(self.out_dir, "text")), exist_ok=True)
os.makedirs((os.path.join(self.out_dir, "mel")), exist_ok=True)

print("Processing Data ...")
out = list()
train = list()
val = list()
n_frames = 0
mel_min = float('inf')
mel_max = -float('inf')
Expand All @@ -65,7 +79,14 @@ def build_from_path(self):
continue
else:
info, n, m_min, m_max = ret
out.append(info)

if self.val_prior is not None:
if basename not in self.val_prior:
train.append(info)
else:
val.append(info)
else:
out.append(info)

if mel_min > m_min:
mel_min = m_min
Expand Down Expand Up @@ -93,15 +114,24 @@ def build_from_path(self):
)
)

random.shuffle(out)
out = [r for r in out if r is not None]
if self.val_prior is not None:
assert len(out) == 0
random.shuffle(train)
train = [r for r in train if r is not None]
val = [r for r in val if r is not None]
else:
assert len(train) == 0 and len(val) == 0
random.shuffle(out)
out = [r for r in out if r is not None]
train = out[self.val_size :]
val = out[: self.val_size]

# Write metadata
with open(os.path.join(self.out_dir, "train.txt"), "w", encoding="utf-8") as f:
for m in out[self.val_size :]:
for m in train:
f.write(m + "\n")
with open(os.path.join(self.out_dir, "val.txt"), "w", encoding="utf-8") as f:
for m in out[: self.val_size]:
for m in val:
f.write(m + "\n")

return out
Expand Down
28 changes: 23 additions & 5 deletions preprocessor/vctk.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,22 @@ def __init__(self, config):
config["preprocessing"]["mel"]["mel_fmin"],
config["preprocessing"]["mel"]["mel_fmax"],
)
self.val_prior = self.val_prior_names(os.path.join(self.out_dir, "val.txt"))
self.speaker_emb = None
if config["preprocessing"]["speaker_embedder"] != "none":
self.speaker_emb = PreDefinedEmbedder(config)

def val_prior_names(self, val_prior_path):
val_prior_names = set()
if os.path.isfile(val_prior_path):
print("Load pre-defined validation set...")
with open(val_prior_path, "r", encoding="utf-8") as f:
for m in f.readlines():
val_prior_names.add(m.split("|")[0])
return list(val_prior_names)
else:
return None

def _init_spker_embeds(self, spkers):
spker_embeds = dict()
for spker in spkers:
Expand Down Expand Up @@ -101,10 +113,16 @@ def build_from_path(self):
info, n, m_min, m_max, spker_embed = ret
# out.append(info)

if i == 0 or i == 1:
val.append(info)
if self.val_prior is not None:
if basename not in self.val_prior:
train.append(info)
else:
val.append(info)
else:
train.append(info)
if i == 0 or i == 1:
val.append(info)
else:
train.append(info)

if self.speaker_emb is not None:
spker_embeds[speaker].append(spker_embed)
Expand Down Expand Up @@ -152,16 +170,16 @@ def build_from_path(self):

# random.shuffle(out)
# out = [r for r in out if r is not None]
if self.val_prior is None:
random.shuffle(train)
train = [r for r in train if r is not None]
val = [r for r in val if r is not None]

# Write metadata
with open(os.path.join(self.out_dir, "train.txt"), "w", encoding="utf-8") as f:
# for m in out[self.val_size :]:
for m in train:
f.write(m + "\n")
with open(os.path.join(self.out_dir, "val.txt"), "w", encoding="utf-8") as f:
# for m in out[: self.val_size]:
for m in val:
f.write(m + "\n")

Expand Down
6 changes: 4 additions & 2 deletions utils/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def infer_one_sample(targets, predictions, vocoder, mel_stats, model_config, pre
attention=True,
)
plt.savefig(os.path.join(
path, str(args.restore_step), "{}_{}.png".format(basename, args.speaker_id) if multi_speaker else "{}.png".format(basename)))
path, str(args.restore_step), "{}_{}.png".format(basename, args.speaker_id)\
if multi_speaker and args.mode == "single" else "{}.png".format(basename)))
plt.close()

from .model import vocoder_infer
Expand All @@ -219,7 +220,8 @@ def infer_one_sample(targets, predictions, vocoder, mel_stats, model_config, pre

sampling_rate = preprocess_config["preprocessing"]["audio"]["sampling_rate"]
wavfile.write(os.path.join(
path, str(args.restore_step), "{}_{}.wav".format(basename, args.speaker_id) if multi_speaker else "{}.wav".format(basename)),
path, str(args.restore_step), "{}_{}.wav".format(basename, args.speaker_id)\
if multi_speaker and args.mode == "single" else "{}.wav".format(basename)),
sampling_rate, wav_predictions[0])


Expand Down

0 comments on commit 1eff7f0

Please sign in to comment.