Merge pull request JorisCos#11 from JorisCos/augmentation_fix

Augmentation fix
s3prl · Jan 4, 2021 · 60199d7 · 60199d7
2 parents 3d44d6d + 7ae7adb
commit 60199d7
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 6 deletions.
diff --git a/generate_librimix.sh b/generate_librimix.sh
@@ -64,11 +64,15 @@ wham &
 
 wait
 
+# Path to python
+python_path=python
+
+# If you wish to rerun this script in the future please comment this line out.
+$python_path scripts/augment_train_noise.py --wham_dir $wham_dir
 
-python scripts/augment_train_noise.py --wham_dir $wham_dir
 for n_src in 2 3; do
   metadata_dir=metadata/Libri$n_src"Mix"
-  python scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \
+  $python_path scripts/create_librimix_from_metadata.py --librispeech_dir $librispeech_dir \
     --wham_dir $wham_dir \
     --metadata_dir $metadata_dir \
     --librimix_outdir $librimix_outdir \

diff --git a/scripts/augment_train_noise.py b/scripts/augment_train_noise.py
@@ -19,10 +19,26 @@ def main(args):
     # List files in that dir
     sound_paths = glob.glob(os.path.join(subdir, '**/*.wav'),
                             recursive=True)
-    print(f'Augmenting {subdir} files')
-    # Transform audio speed
-    augment_noise(sound_paths, 0.8)
-    augment_noise(sound_paths, 1.2)
+    # Avoid running this script if it already have been run
+    if len(sound_paths) == 60000:
+        print("It appears that augmented files have already been generated.\n"
+              "Skipping data augmentation.")
+        return
+    elif len(sound_paths) != 20000:
+        print("It appears that augmented files have not been generated properly\n"
+              "Resuming augmentation.")
+        originals = [x for x in sound_paths if 'sp' not in x]
+        to_be_removed_08 = [x.replace('sp08','') for x in sound_paths if 'sp08' in x]
+        to_be_removed_12 = [x.replace('sp12','') for x in sound_paths if 'sp12' in x ]
+        sound_paths_08 = list(set(originals) - set(to_be_removed_08))
+        sound_paths_12 = list(set(originals) - set(to_be_removed_12))
+        augment_noise(sound_paths_08, 0.8)
+        augment_noise(sound_paths_12, 1.2)
+    else:
+        print(f'Augmenting {subdir} files')
+        # Transform audio speed
+        augment_noise(sound_paths, 0.8)
+        augment_noise(sound_paths, 1.2)
 
 
 def augment_noise(sound_paths, speed):