From 93273ff01c8f15f20b66be8612da4e8523bfcf60 Mon Sep 17 00:00:00 2001 From: JorisCos Date: Thu, 10 Dec 2020 11:23:11 +0100 Subject: [PATCH 1/4] fix rerun on augmentation data --- generate_librimix.sh | 3 ++- scripts/augment_train_noise.py | 25 +++++++++++++++++++++---- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/generate_librimix.sh b/generate_librimix.sh index a06c588..4ae49a0 100644 --- a/generate_librimix.sh +++ b/generate_librimix.sh @@ -64,8 +64,9 @@ wham & wait - +# If you wish to rerun this script in the future please comment this line out. python scripts/augment_train_noise.py --wham_dir $wham_dir + for n_src in 2 3; do metadata_dir=metadata/Libri$n_src"Mix" python scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \ diff --git a/scripts/augment_train_noise.py b/scripts/augment_train_noise.py index 2534258..77b468b 100644 --- a/scripts/augment_train_noise.py +++ b/scripts/augment_train_noise.py @@ -19,10 +19,27 @@ def main(args): # List files in that dir sound_paths = glob.glob(os.path.join(subdir, '**/*.wav'), recursive=True) - print(f'Augmenting {subdir} files') - # Transform audio speed - augment_noise(sound_paths, 0.8) - augment_noise(sound_paths, 1.2) + # Avoid running this script if it already have been run + if len(sound_paths) == 60000: + print("It appears that augmented files have already been generated." + "If the augmentation process went well the first time you ran " + "generate_librimix.sh please comment out the line relative to" + " augment_train_noise.py. Skipping data augmentation ") + return + elif len(sound_paths) != 20000: + print("It appears that augmented files have not been generated properly" + "Resuming augmentation") + to_be_removed_08 = [x for path in sound_paths if 'sp08' in path for x in (path,path.replace(('sp08'),''))] + to_be_removed_12 = [x for path in sound_paths if 'sp12' in path for x in (path,path.replace(('sp12'),''))] + sound_paths_08 = list(set(sound_paths) - set(to_be_removed_08)) + sound_paths_12 = list(set(sound_paths) - set(to_be_removed_12)) + augment_noise(sound_paths_08, 0.8) + augment_noise(sound_paths_12, 1.2) + else: + print(f'Augmenting {subdir} files') + # Transform audio speed + augment_noise(sound_paths, 0.8) + augment_noise(sound_paths, 1.2) def augment_noise(sound_paths, speed): From 0ce3e58fc097408b583d3dc2fbf02b8b12245b53 Mon Sep 17 00:00:00 2001 From: JorisCos Date: Thu, 10 Dec 2020 13:57:47 +0100 Subject: [PATCH 2/4] fix print add python path --- scripts/augment_train_noise.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/augment_train_noise.py b/scripts/augment_train_noise.py index 77b468b..6d4d502 100644 --- a/scripts/augment_train_noise.py +++ b/scripts/augment_train_noise.py @@ -21,14 +21,14 @@ def main(args): recursive=True) # Avoid running this script if it already have been run if len(sound_paths) == 60000: - print("It appears that augmented files have already been generated." - "If the augmentation process went well the first time you ran " + print("It appears that augmented files have already been generated.\n" + " If the augmentation process went well the first time you ran " "generate_librimix.sh please comment out the line relative to" - " augment_train_noise.py. Skipping data augmentation ") + " augment_train_noise.py.\nSkipping data augmentation.") return elif len(sound_paths) != 20000: - print("It appears that augmented files have not been generated properly" - "Resuming augmentation") + print("It appears that augmented files have not been generated properly\n" + "Resuming augmentation.") to_be_removed_08 = [x for path in sound_paths if 'sp08' in path for x in (path,path.replace(('sp08'),''))] to_be_removed_12 = [x for path in sound_paths if 'sp12' in path for x in (path,path.replace(('sp12'),''))] sound_paths_08 = list(set(sound_paths) - set(to_be_removed_08)) From 24b2aebdc9b62e845674b731853eb39605471b32 Mon Sep 17 00:00:00 2001 From: JorisCos Date: Thu, 10 Dec 2020 14:41:13 +0100 Subject: [PATCH 3/4] test done --- generate_librimix.sh | 7 +++++-- scripts/augment_train_noise.py | 9 +++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/generate_librimix.sh b/generate_librimix.sh index 4ae49a0..d4f77b7 100644 --- a/generate_librimix.sh +++ b/generate_librimix.sh @@ -64,12 +64,15 @@ wham & wait +# Path to python +python_path=python + # If you wish to rerun this script in the future please comment this line out. -python scripts/augment_train_noise.py --wham_dir $wham_dir +$python_path scripts/augment_train_noise.py --wham_dir $wham_dir for n_src in 2 3; do metadata_dir=metadata/Libri$n_src"Mix" - python scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \ + $python_path scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \ --wham_dir $wham_dir \ --metadata_dir $metadata_dir \ --librimix_outdir $librimix_outdir \ diff --git a/scripts/augment_train_noise.py b/scripts/augment_train_noise.py index 6d4d502..d012249 100644 --- a/scripts/augment_train_noise.py +++ b/scripts/augment_train_noise.py @@ -29,10 +29,11 @@ def main(args): elif len(sound_paths) != 20000: print("It appears that augmented files have not been generated properly\n" "Resuming augmentation.") - to_be_removed_08 = [x for path in sound_paths if 'sp08' in path for x in (path,path.replace(('sp08'),''))] - to_be_removed_12 = [x for path in sound_paths if 'sp12' in path for x in (path,path.replace(('sp12'),''))] - sound_paths_08 = list(set(sound_paths) - set(to_be_removed_08)) - sound_paths_12 = list(set(sound_paths) - set(to_be_removed_12)) + originals = [x for x in sound_paths if 'sp' not in x] + to_be_removed_08 = [x.replace('sp08','') for x in sound_paths if 'sp08' in x] + to_be_removed_12 = [x.replace('sp12','') for x in sound_paths if 'sp12' in x ] + sound_paths_08 = list(set(originals) - set(to_be_removed_08)) + sound_paths_12 = list(set(originals) - set(to_be_removed_12)) augment_noise(sound_paths_08, 0.8) augment_noise(sound_paths_12, 1.2) else: From 7ae7adbff861f679d403cafde87d1c82591613dc Mon Sep 17 00:00:00 2001 From: JorisCos Date: Thu, 17 Dec 2020 10:00:58 +0100 Subject: [PATCH 4/4] fix message --- scripts/augment_train_noise.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/augment_train_noise.py b/scripts/augment_train_noise.py index d012249..352c07d 100644 --- a/scripts/augment_train_noise.py +++ b/scripts/augment_train_noise.py @@ -22,9 +22,7 @@ def main(args): # Avoid running this script if it already have been run if len(sound_paths) == 60000: print("It appears that augmented files have already been generated.\n" - " If the augmentation process went well the first time you ran " - "generate_librimix.sh please comment out the line relative to" - " augment_train_noise.py.\nSkipping data augmentation.") + "Skipping data augmentation.") return elif len(sound_paths) != 20000: print("It appears that augmented files have not been generated properly\n"