From 6c1e9f12cf5a84972a2cab225f32e680d9833a80 Mon Sep 17 00:00:00 2001
From: laurent <laurent.mazare@gmail.com>
Date: Thu, 3 Jul 2025 07:52:27 +0200
Subject: [PATCH] Mention the MLX quantization.

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index a397045..7954612 100644
--- a/README.md
+++ b/README.md
@@ -250,10 +250,13 @@ hardware acceleration on Apple silicon.
 
 Use our example script to run Kyutai TTS on MLX.
 The script takes text from stdin or a file and can output to a file or stream the resulting audio.
+When streaming the output, if the model is not fast enough to keep with
+real-time, you can use the `--quantize 8` or `--quantize 4` flags to quantize
+the model resulting in faster inference.
 
 ```bash
 # From stdin, plays audio immediately
-echo "Hey, how are you?" | python scripts/tts_mlx.py - -
+echo "Hey, how are you?" | python scripts/tts_mlx.py - - --quantize 8
 
 # From text file to audio file
 python scripts/tts_mlx.py text_to_say.txt audio_output.wav