Chunk decoding in the pth notebook.

2025-07-03 12:56:00 +02:00 · 2025-07-03 12:56:00 +02:00 · 0ee2354176
commit 0ee2354176
parent dc8bffabe0
1 changed files with 9 additions and 7 deletions
--- a/tts_pytorch.ipynb
+++ b/tts_pytorch.ipynb
@ -74,15 +74,17 @@
   "source": [
    "print(\"Generating audio...\")\n",
    "\n",
-    "# This doesn't do streaming generation,\n",
-    "result = tts_model.generate([entries], [condition_attributes])\n",
+    "pcms = []\n",
+    "def _on_frame(frame):\n",
+    "    print(\"Step\", len(pcms), end=\"\\r\")\n",
+    "    if (frame != -1).all():\n",
+    "        pcm = tts_model.mimi.decode(frame[:, 1:, :]).cpu().numpy()\n",
+    "        pcms.append(np.clip(pcm[0, 0], -1, 1))\n",
    "\n",
-    "frames = torch.cat(result.frames, dim=-1)\n",
-    "audio_tokens = frames[:, tts_model.lm.audio_offset :, tts_model.delay_steps :]\n",
-    "with torch.no_grad():\n",
-    "    audios = tts_model.mimi.decode(audio_tokens)\n",
+    "result = tts_model.generate([entries], [condition_attributes], on_frame=_on_frame)\n",
    "\n",
-    "audio = audios[0].cpu().numpy()"
+    "print(\"Done generating.\")\n",
+    "audio = np.concatenate(pcms, axis=-1)",
   ]
  },
  {