plop

2025-07-03 15:06:28 +02:00 · 2025-07-03 15:06:28 +02:00 · 5afa2fe656
commit 5afa2fe656
parent 774ef275a4
2 changed files with 2 additions and 5 deletions
--- a/scripts/tts_pytorch.py
+++ b/scripts/tts_pytorch.py
@ -65,7 +65,7 @@ def main():
    voice_path = tts_model.get_voice_path(args.voice)
    # CFG coef goes here because the model was trained with CFG distillation,
    # so it's not _actually_ doing CFG at inference time.
-    # Also, if you are generating a dialog, you should have at least two voices in the list.
+    # Also, if you are generating a dialog, you should have two voices in the list.
    condition_attributes = tts_model.make_condition_attributes(
        [voice_path], cfg_coef=2.0
    )
@ -76,7 +76,6 @@ def main():
        pcms = queue.Queue()
        @torch.no_grad()
        def _on_frame(frame):
            if (frame != -1).all():
                pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()
--- a/tts_pytorch.ipynb
+++ b/tts_pytorch.ipynb
@ -63,7 +63,7 @@
    "voice_path = tts_model.get_voice_path(voice)\n",
    "# CFG coef goes here because the model was trained with CFG distillation,\n",
    "# so it's not _actually_ doing CFG at inference time.\n",
-    "# Also, if you are generating a dialog, you should have at least two voices in the list.\n",
+    "# Also, if you are generating a dialog, you should have two voices in the list.\n",
    "condition_attributes = tts_model.make_condition_attributes(\n",
    "    [voice_path], cfg_coef=2.0\n",
    ")"
@ -79,8 +79,6 @@
    "print(\"Generating audio...\")\n",
    "\n",
    "pcms = []\n",
    "\n",
    "@torch.no_grad()\n",
    "def _on_frame(frame):\n",
    "    print(\"Step\", len(pcms), end=\"\\r\")\n",
    "    if (frame != -1).all():\n",