From 35f133508fb72e4dc61ed8a21ed7ee67cfcb401c Mon Sep 17 00:00:00 2001
From: laurent <laurent.mazare@gmail.com>
Date: Thu, 31 Jul 2025 17:47:46 +0200
Subject: [PATCH] Use the proper repos when vad is on.

---
 scripts/stt_from_file_mlx.py     | 7 ++++++-
 scripts/stt_from_file_pytorch.py | 3 +++
 scripts/stt_from_mic_mlx.py      | 7 ++++++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/scripts/stt_from_file_mlx.py b/scripts/stt_from_file_mlx.py
index 4065952..26222f6 100644
--- a/scripts/stt_from_file_mlx.py
+++ b/scripts/stt_from_file_mlx.py
@@ -24,13 +24,18 @@ if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("in_file", help="The file to transcribe.")
     parser.add_argument("--max-steps", default=4096)
-    parser.add_argument("--hf-repo", default="kyutai/stt-1b-en_fr-mlx")
+    parser.add_argument("--hf-repo")
     parser.add_argument(
         "--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
     )
     args = parser.parse_args()
 
     audio, _ = sphn.read(args.in_file, sample_rate=24000)
+    if args.hf_repo is None:
+        if args.vad:
+            args.hf_repo = "kyutai/stt-1b-en_fr-candle"
+        else:
+            args.hf_repo = "kyutai/stt-1b-en_fr-mlx"
     lm_config = hf_hub_download(args.hf_repo, "config.json")
     with open(lm_config, "r") as fobj:
         lm_config = json.load(fobj)
diff --git a/scripts/stt_from_file_pytorch.py b/scripts/stt_from_file_pytorch.py
index 7069761..cf3fb05 100644
--- a/scripts/stt_from_file_pytorch.py
+++ b/scripts/stt_from_file_pytorch.py
@@ -128,6 +128,9 @@ def tokens_to_timestamped_text(
 
 
 def main(args):
+    if args.vad and args.hf_repo is None:
+        args.hf_repo = "kyutai/stt-1b-en_fr-candle"
+
     info = moshi.models.loaders.CheckpointInfo.from_hf_repo(
         args.hf_repo,
         moshi_weights=args.moshi_weight,
diff --git a/scripts/stt_from_mic_mlx.py b/scripts/stt_from_mic_mlx.py
index e068585..589a987 100644
--- a/scripts/stt_from_mic_mlx.py
+++ b/scripts/stt_from_mic_mlx.py
@@ -25,12 +25,17 @@ from moshi_mlx import models, utils
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("--max-steps", default=4096)
-    parser.add_argument("--hf-repo", default="kyutai/stt-1b-en_fr-mlx")
+    parser.add_argument("--hf-repo")
     parser.add_argument(
         "--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
     )
     args = parser.parse_args()
 
+    if args.hf_repo is None:
+        if args.vad:
+            args.hf_repo = "kyutai/stt-1b-en_fr-candle"
+        else:
+            args.hf_repo = "kyutai/stt-1b-en_fr-mlx"
     lm_config = hf_hub_download(args.hf_repo, "config.json")
     with open(lm_config, "r") as fobj:
         lm_config = json.load(fobj)