Skip to content

Commit

Permalink
fix(configs): update config field types
Browse files Browse the repository at this point in the history
  • Loading branch information
eginhard committed Jan 11, 2025
1 parent 32a0af9 commit d8bb2d3
Show file tree
Hide file tree
Showing 10 changed files with 27 additions and 27 deletions.
8 changes: 4 additions & 4 deletions TTS/tts/configs/align_tts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class AlignTTSConfig(BaseTTSConfig):
model: str = "align_tts"
# model specific params
model_args: AlignTTSArgs = field(default_factory=AlignTTSArgs)
phase_start_steps: list[int] = None
phase_start_steps: list[int] | None = None

ssim_alpha: float = 1.0
spec_loss_alpha: float = 1.0
Expand All @@ -79,13 +79,13 @@ class AlignTTSConfig(BaseTTSConfig):
# multi-speaker settings
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_file: str | None = None

# optimizer parameters
optimizer: str = "Adam"
optimizer_params: dict = field(default_factory=lambda: {"betas": [0.9, 0.998], "weight_decay": 1e-6})
lr_scheduler: str = None
lr_scheduler_params: dict = None
lr_scheduler: str | None = None
lr_scheduler_params: dict | None = None
lr: float = 1e-4
grad_clip: float = 5.0

Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/configs/fast_pitch_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,10 @@ class FastPitchConfig(BaseTTSConfig):

# multi-speaker settings
num_speakers: int = 0
speakers_file: str = None
speakers_file: str | None = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_file: str | None = None
d_vector_dim: int = 0

# optimizer parameters
Expand Down Expand Up @@ -149,7 +149,7 @@ class FastPitchConfig(BaseTTSConfig):

# dataset configs
compute_f0: bool = True
f0_cache_path: str = None
f0_cache_path: str | None = None

# testing
test_sentences: list[str] = field(
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/configs/fast_speech_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@ class FastSpeechConfig(BaseTTSConfig):

# multi-speaker settings
num_speakers: int = 0
speakers_file: str = None
speakers_file: str | None = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_file: str | None = None
d_vector_dim: int = 0

# optimizer parameters
Expand Down Expand Up @@ -143,7 +143,7 @@ class FastSpeechConfig(BaseTTSConfig):

# dataset configs
compute_f0: bool = False
f0_cache_path: str = None
f0_cache_path: str | None = None

# testing
test_sentences: list[str] = field(
Expand Down
8 changes: 4 additions & 4 deletions TTS/tts/configs/fastspeech2_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,10 @@ class Fastspeech2Config(BaseTTSConfig):

# multi-speaker settings
num_speakers: int = 0
speakers_file: str = None
speakers_file: str | None = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_file: str | None = None
d_vector_dim: int = 0

# optimizer parameters
Expand Down Expand Up @@ -160,11 +160,11 @@ class Fastspeech2Config(BaseTTSConfig):

# dataset configs
compute_f0: bool = True
f0_cache_path: str = None
f0_cache_path: str | None = None

# dataset configs
compute_energy: bool = True
energy_cache_path: str = None
energy_cache_path: str | None = None

# testing
test_sentences: list[str] = field(
Expand Down
8 changes: 4 additions & 4 deletions TTS/tts/configs/glow_tts_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class GlowTTSConfig(BaseTTSConfig):
model: str = "glow_tts"

# model params
num_chars: int = None
num_chars: int | None = None
encoder_type: str = "rel_pos_transformer"
encoder_params: dict = field(
default_factory=lambda: {
Expand Down Expand Up @@ -146,15 +146,15 @@ class GlowTTSConfig(BaseTTSConfig):
data_dep_init_steps: int = 10

# inference params
style_wav_for_test: str = None
style_wav_for_test: str | None = None
inference_noise_scale: float = 0.0
length_scale: float = 1.0

# multi-speaker settings
use_speaker_embedding: bool = False
speakers_file: str = None
speakers_file: str | None = None
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_file: str | None = None

# optimizer parameters
optimizer: str = "RAdam"
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/configs/speedy_speech_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ class SpeedySpeechConfig(BaseTTSConfig):

# multi-speaker settings
num_speakers: int = 0
speakers_file: str = None
speakers_file: str | None = None
use_speaker_embedding: bool = False
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_file: str | None = None
d_vector_dim: int = 0

# optimizer parameters
Expand Down Expand Up @@ -160,7 +160,7 @@ class SpeedySpeechConfig(BaseTTSConfig):

# dataset configs
compute_f0: bool = False
f0_cache_path: str = None
f0_cache_path: str | None = None

# testing
test_sentences: list[str] = field(
Expand Down
6 changes: 3 additions & 3 deletions TTS/tts/configs/tacotron_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class TacotronConfig(BaseTTSConfig):

# attention layers
attention_type: str = "original"
attention_heads: int = None
attention_heads: int | None = None
attention_norm: str = "sigmoid"
attention_win: bool = False
windowing: bool = False
Expand All @@ -188,8 +188,8 @@ class TacotronConfig(BaseTTSConfig):
use_speaker_embedding: bool = False
speaker_embedding_dim: int = 512
use_d_vector_file: bool = False
d_vector_file: str = False
d_vector_dim: int = None
d_vector_file: str | None = None
d_vector_dim: int | None = None

# optimizer parameters
optimizer: str = "RAdam"
Expand Down
2 changes: 1 addition & 1 deletion TTS/tts/configs/vits_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ class VitsConfig(BaseTTSConfig):
add_blank: bool = True

# testing
test_sentences: list[list] = field(
test_sentences: list[str] | list[list[str]] = field(
default_factory=lambda: [
["It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."],
["Be a voice, not an echo."],
Expand Down
2 changes: 1 addition & 1 deletion TTS/vocoder/configs/multiband_melgan_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class MultibandMelganConfig(BaseGANVocoderConfig):
pad_short: int = 2000
use_noise_augment: bool = False
use_cache: bool = True
steps_to_start_discriminator: bool = 200000
steps_to_start_discriminator: int = 200000

# LOSS PARAMETERS - overrides
use_stft_loss: bool = True
Expand Down
2 changes: 1 addition & 1 deletion TTS/vocoder/configs/shared_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class BaseGANVocoderConfig(BaseVocoderConfig):
target_loss: str = "loss_0" # loss value to pick the best model to save after each epoch

# optimizer
grad_clip: float = field(default_factory=lambda: [5, 5])
grad_clip: float | list[float] = field(default_factory=lambda: [5, 5])
lr_gen: float = 0.0002 # Initial learning rate.
lr_disc: float = 0.0002 # Initial learning rate.
lr_scheduler_gen: str = "ExponentialLR" # one of the schedulers from https:#pytorch.org/docs/stable/optim.html
Expand Down

0 comments on commit d8bb2d3

Please sign in to comment.