Fixes for the notebook.

2025-07-03 13:05:00 +02:00 · 2025-07-03 13:05:00 +02:00 · 25574aa104
commit 25574aa104
parent 1cd9529f65
2 changed files with 3 additions and 2 deletions
--- a/README.md
+++ b/README.md
@ -52,7 +52,7 @@ Here is how to choose which one to use:
 <a href="https://huggingface.co/kyutai/stt-2.6b-en" target="_blank" style="margin: 2px;">
    <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue" style="display: inline-block; vertical-align: middle;"/>
 </a>
-<a target="_blank" href="https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/stt_pytorch.ipynb">
+<a target="_blank" href="https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/tts_pytorch.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
 </a>

--- a/tts_pytorch.ipynb
+++ b/tts_pytorch.ipynb
@ -7,7 +7,7 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "!pip install git+https://git@github.com/kyutai-labs/moshi#egg=moshi&subdirectory=moshi"
+    "!pip install \"git+https://git@github.com/kyutai-labs/moshi#egg=moshi&subdirectory=moshi\""
   ]
  },
  {
@ -54,6 +54,7 @@
    "tts_model = TTSModel.from_checkpoint_info(\n",
    "    checkpoint_info, n_q=32, temp=0.6, device=torch.device(\"cuda\"), dtype=torch.half\n",
    ")\n",
+    "tts_model.mimi.streaming_forever(1)\n",
    "\n",
    "# You could also generate multiple audios at once by passing a list of texts.\n",
    "entries = tts_model.prepare_script([text], padding_between=1)\n",