This commit is contained in:
Alexandre Défossez 2025-07-03 15:06:28 +02:00
parent 774ef275a4
commit 5afa2fe656
2 changed files with 2 additions and 5 deletions

View File

@ -65,7 +65,7 @@ def main():
voice_path = tts_model.get_voice_path(args.voice) voice_path = tts_model.get_voice_path(args.voice)
# CFG coef goes here because the model was trained with CFG distillation, # CFG coef goes here because the model was trained with CFG distillation,
# so it's not _actually_ doing CFG at inference time. # so it's not _actually_ doing CFG at inference time.
# Also, if you are generating a dialog, you should have at least two voices in the list. # Also, if you are generating a dialog, you should have two voices in the list.
condition_attributes = tts_model.make_condition_attributes( condition_attributes = tts_model.make_condition_attributes(
[voice_path], cfg_coef=2.0 [voice_path], cfg_coef=2.0
) )
@ -76,7 +76,6 @@ def main():
pcms = queue.Queue() pcms = queue.Queue()
@torch.no_grad()
def _on_frame(frame): def _on_frame(frame):
if (frame != -1).all(): if (frame != -1).all():
pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy() pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()

View File

@ -63,7 +63,7 @@
"voice_path = tts_model.get_voice_path(voice)\n", "voice_path = tts_model.get_voice_path(voice)\n",
"# CFG coef goes here because the model was trained with CFG distillation,\n", "# CFG coef goes here because the model was trained with CFG distillation,\n",
"# so it's not _actually_ doing CFG at inference time.\n", "# so it's not _actually_ doing CFG at inference time.\n",
"# Also, if you are generating a dialog, you should have at least two voices in the list.\n", "# Also, if you are generating a dialog, you should have two voices in the list.\n",
"condition_attributes = tts_model.make_condition_attributes(\n", "condition_attributes = tts_model.make_condition_attributes(\n",
" [voice_path], cfg_coef=2.0\n", " [voice_path], cfg_coef=2.0\n",
")" ")"
@ -79,8 +79,6 @@
"print(\"Generating audio...\")\n", "print(\"Generating audio...\")\n",
"\n", "\n",
"pcms = []\n", "pcms = []\n",
"\n",
"@torch.no_grad()\n",
"def _on_frame(frame):\n", "def _on_frame(frame):\n",
" print(\"Step\", len(pcms), end=\"\\r\")\n", " print(\"Step\", len(pcms), end=\"\\r\")\n",
" if (frame != -1).all():\n", " if (frame != -1).all():\n",