Allow for using local voices in the pytorch examples. (#100)

2025-07-31 12:48:05 +02:00 · 2025-07-31 12:48:05 +02:00 · 7dc926d50c
commit 7dc926d50c
parent ab8e8c59b7
2 changed files with 8 additions and 2 deletions
--- a/scripts/tts_pytorch.py
+++ b/scripts/tts_pytorch.py
@ -68,6 +68,9 @@ def main():
    # If you want to make a dialog, you can pass more than one turn [text_speaker_1, text_speaker_2, text_2_speaker_1, ...]
    entries = tts_model.prepare_script([text], padding_between=1)
    if args.voice.endswith(".safetensors"):
        voice_path = args.voice
    else:
        voice_path = tts_model.get_voice_path(args.voice)
    # CFG coef goes here because the model was trained with CFG distillation,
    # so it's not _actually_ doing CFG at inference time.
--- a/scripts/tts_pytorch_streaming.py
+++ b/scripts/tts_pytorch_streaming.py
@ -183,6 +183,9 @@ def main():
        checkpoint_info, n_q=32, temp=0.6, device=args.device
    )
    if args.voice.endswith(".safetensors"):
        voice_path = args.voice
    else:
        voice_path = tts_model.get_voice_path(args.voice)
    # CFG coef goes here because the model was trained with CFG distillation,
    # so it's not _actually_ doing CFG at inference time.