diff --git a/README.md b/README.md index 4fa67c2..c67a138 100644 --- a/README.md +++ b/README.md @@ -7,23 +7,23 @@ Delayed Streams Modeling (DSM) is a flexible formulation for streaming, multimod The leaderboard model handles english only, it has ~2.6B parameters. #### PyTorch implementation -[[Hugging Face]](https://huggingface.co/kyutai/stt) +[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en) ```bash # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3 -python -m moshi.run_inference --hf-repo kyutai/stt bria.mp3 +python -m moshi.run_inference --hf-repo kyutai/stt-2.6B-en bria.mp3 ``` #### MLX implementation -[[Hugging Face]](https://huggingface.co/kyutai/stt-mlx) +[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en-mlx) ```bash # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3 -python -m moshi_mlx.run_inference --hf-repo kyutai/stt-mlx bria.mp3 --temp 0 +python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6B-en-mlx bria.mp3 --temp 0 ``` #### Rust implementation -[[Hugging Face]](https://huggingface.co/kyutai/stt-candle) +[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en-candle) The Rust implementation provides a server that can process multiple streaming queries in parallel. Dependening on the amount of memory on your GPU, you may diff --git a/configs/config-stt-hf.toml b/configs/config-stt-hf.toml index 99c539d..faf5dbf 100644 --- a/configs/config-stt-hf.toml +++ b/configs/config-stt-hf.toml @@ -6,9 +6,9 @@ authorized_ids = ["open_token"] [modules.asr] path = "/api/asr-streaming" type = "BatchedAsr" -lm_model_file = "hf://kyutai/stt-candle/model.safetensors" -text_tokenizer_file = "hf://kyutai/stt-candle/tokenizer_en_audio_4000.model" -audio_tokenizer_file = "hf://kyutai/stt-candle/mimi-pytorch-e351c8d8@125.safetensors" +lm_model_file = "hf://kyutai/stt-2.6B-en-candle/model.safetensors" +text_tokenizer_file = "hf://kyutai/stt-2.6B-en-candle/tokenizer_en_audio_4000.model" +audio_tokenizer_file = "hf://kyutai/stt-2.6B-en-candle/mimi-pytorch-e351c8d8@125.safetensors" asr_delay_in_tokens = 6 batch_size = 16 conditioning_learnt_padding = true