diff --git a/README.md b/README.md
index b546f3b..e3c8a18 100644
--- a/README.md
+++ b/README.md
@@ -59,18 +59,17 @@ Here is how to choose which one to use:
 For an example of how to use the model in a way where you can directly stream in PyTorch tensors,
 [see our Colab notebook](https://colab.research.google.com/github/kyutai-labs/delayed-streams-modeling/blob/main/transcribe_via_pytorch.ipynb).
 
-If you just want to run the model on a file, you can use `moshi.run_inference`.
 This requires the [moshi package](https://pypi.org/project/moshi/)
 with version 0.2.6 or later, which can be installed via pip.
 
+If you just want to run the model on a file, you can use `moshi.run_inference`.
+
 ```bash
 python -m moshi.run_inference --hf-repo kyutai/stt-2.6b-en audio/bria.mp3
 ```
 
-If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step and run directly:
-```bash
-uvx --with moshi python -m moshi.run_inference --hf-repo kyutai/stt-2.6b-en audio/bria.mp3
-```
+If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step
+and just prefix the command above with `uvx --with moshi`.
 
 Additionally, we provide two scripts that highlight different usage scenarios. The first script illustrates how to extract word-level timestamps from the model's outputs:
 
@@ -157,15 +156,20 @@ hardware acceleration on Apple silicon.
 This requires the [moshi-mlx package](https://pypi.org/project/moshi-mlx/)
 with version 0.2.6 or later, which can be installed via pip.
 
+If you just want to run the model on a file, you can use `moshi_mlx.run_inference`:
+
 ```bash
 python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6b-en-mlx audio/bria.mp3 --temp 0
 ```
 
-If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step and run directly:
+If you have [uv](https://docs.astral.sh/uv/) installed, you can skip the installation step
+and just prefix the command above with `uvx --with moshi-mlx`.
+
+If you want to transcribe audio from your microphone, use:
+
 ```bash
-uvx --with moshi-mlx python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6b-en-mlx audio/bria.mp3 --temp 0
+python scripts/transcribe_from_mic_via_mlx.py
 ```
-It will install the moshi package in a temporary environment and run the speech-to-text.
 
 The MLX models can also be used in swift using the [moshi-swift
 codebase](https://github.com/kyutai-labs/moshi-swift), the 1b model has been
diff --git a/scripts/evaluate_on_dataset.py b/scripts/evaluate_on_dataset.py
index 3bef8aa..684fe5c 100644
--- a/scripts/evaluate_on_dataset.py
+++ b/scripts/evaluate_on_dataset.py
@@ -14,14 +14,6 @@
 Example implementation of the streaming STT example. Here we group
 test utterances in batches (pre- and post-padded with silence) and
 and then feed these batches into the streaming STT model frame-by-frame.
-
-Example command:
-```
-uv run scripts/streaming_stt.py \
-    --dataset meanwhile \
-    --hf-repo  kyutai/stt-2.6b-en
-```
-
 """
 
 # The outputs I get on my H100 using this code with the 2.6B model,
@@ -365,7 +357,7 @@ if __name__ == "__main__":
     )
 
     parser.add_argument(
-        "--hf-repo", type=str, help="HF repo to load the STT model from. "
+        "--hf-repo", type=str, help="HF repo to load the STT model from."
     )
     parser.add_argument("--tokenizer", type=str, help="Path to a local tokenizer file.")
     parser.add_argument(
diff --git a/scripts/transcribe_from_file_via_pytorch.py b/scripts/transcribe_from_file_via_pytorch.py
index e941da8..f113b3a 100644
--- a/scripts/transcribe_from_file_via_pytorch.py
+++ b/scripts/transcribe_from_file_via_pytorch.py
@@ -10,13 +10,6 @@
 
 """An example script that illustrates how one can get per-word timestamps from
 Kyutai STT models.
-
-Usage:
-```
-uv run scripts/streaming_stt_timestamps.py \
-    --hf-repo kyutai/stt-2.6b-en \
-    --file bria.mp3
-```
 """
 
 import argparse
@@ -185,6 +178,8 @@ def main(args):
             if text_tokens is not None:
                 text_tokens_accum.append(text_tokens)
 
+            print(tokenizer.decode(text_tokens.numpy().tolist()))
+
     utterance_tokens = torch.concat(text_tokens_accum, dim=-1)
     timed_text = tokens_to_timestamped_text(
         utterance_tokens,
@@ -201,11 +196,7 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Example streaming STT w/ timestamps.")
-    parser.add_argument(
-        "--file",
-        required=True,
-        help="File to transcribe.",
-    )
+    parser.add_argument("in_file", help="The file to transcribe.")
 
     parser.add_argument(
         "--hf-repo", type=str, help="HF repo to load the STT model from. "
diff --git a/scripts/transcribe_from_mic_via_mlx.py b/scripts/transcribe_from_mic_via_mlx.py
index e8792e2..8f82af6 100644
--- a/scripts/transcribe_from_mic_via_mlx.py
+++ b/scripts/transcribe_from_mic_via_mlx.py
@@ -70,7 +70,7 @@ if __name__ == "__main__":
     def audio_callback(indata, _frames, _time, _status):
         block_queue.put(indata.copy())
 
-    print("start recording the user input")
+    print("recording audio from microphone, speak to get your words transcribed")
     with sd.InputStream(
         channels=1,
         dtype="float32",