Use the proper repos when vad is on.

This commit is contained in:
laurent 2025-07-31 17:47:46 +02:00
parent af2283de3f
commit 35f133508f
3 changed files with 15 additions and 2 deletions

View File

@ -24,13 +24,18 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("in_file", help="The file to transcribe.")
parser.add_argument("--max-steps", default=4096)
parser.add_argument("--hf-repo", default="kyutai/stt-1b-en_fr-mlx")
parser.add_argument("--hf-repo")
parser.add_argument(
"--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
)
args = parser.parse_args()
audio, _ = sphn.read(args.in_file, sample_rate=24000)
if args.hf_repo is None:
if args.vad:
args.hf_repo = "kyutai/stt-1b-en_fr-candle"
else:
args.hf_repo = "kyutai/stt-1b-en_fr-mlx"
lm_config = hf_hub_download(args.hf_repo, "config.json")
with open(lm_config, "r") as fobj:
lm_config = json.load(fobj)

View File

@ -128,6 +128,9 @@ def tokens_to_timestamped_text(
def main(args):
if args.vad and args.hf_repo is None:
args.hf_repo = "kyutai/stt-1b-en_fr-candle"
info = moshi.models.loaders.CheckpointInfo.from_hf_repo(
args.hf_repo,
moshi_weights=args.moshi_weight,

View File

@ -25,12 +25,17 @@ from moshi_mlx import models, utils
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--max-steps", default=4096)
parser.add_argument("--hf-repo", default="kyutai/stt-1b-en_fr-mlx")
parser.add_argument("--hf-repo")
parser.add_argument(
"--vad", action="store_true", help="Enable VAD (Voice Activity Detection)."
)
args = parser.parse_args()
if args.hf_repo is None:
if args.vad:
args.hf_repo = "kyutai/stt-1b-en_fr-candle"
else:
args.hf_repo = "kyutai/stt-1b-en_fr-mlx"
lm_config = hf_hub_download(args.hf_repo, "config.json")
with open(lm_config, "r") as fobj:
lm_config = json.load(fobj)