Rename the repos.

This commit is contained in:
laurent 2025-06-17 10:28:47 +02:00
parent 1f482e5b8d
commit ad618fd64d
2 changed files with 8 additions and 8 deletions

View File

@ -7,23 +7,23 @@ Delayed Streams Modeling (DSM) is a flexible formulation for streaming, multimod
The leaderboard model handles english only, it has ~2.6B parameters. The leaderboard model handles english only, it has ~2.6B parameters.
#### PyTorch implementation #### PyTorch implementation
[[Hugging Face]](https://huggingface.co/kyutai/stt) [[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en)
```bash ```bash
# wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3 # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
python -m moshi.run_inference --hf-repo kyutai/stt bria.mp3 python -m moshi.run_inference --hf-repo kyutai/stt-2.6B-en bria.mp3
``` ```
#### MLX implementation #### MLX implementation
[[Hugging Face]](https://huggingface.co/kyutai/stt-mlx) [[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en-mlx)
```bash ```bash
# wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3 # wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
python -m moshi_mlx.run_inference --hf-repo kyutai/stt-mlx bria.mp3 --temp 0 python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6B-en-mlx bria.mp3 --temp 0
``` ```
#### Rust implementation #### Rust implementation
[[Hugging Face]](https://huggingface.co/kyutai/stt-candle) [[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en-candle)
The Rust implementation provides a server that can process multiple streaming The Rust implementation provides a server that can process multiple streaming
queries in parallel. Dependening on the amount of memory on your GPU, you may queries in parallel. Dependening on the amount of memory on your GPU, you may

View File

@ -6,9 +6,9 @@ authorized_ids = ["open_token"]
[modules.asr] [modules.asr]
path = "/api/asr-streaming" path = "/api/asr-streaming"
type = "BatchedAsr" type = "BatchedAsr"
lm_model_file = "hf://kyutai/stt-candle/model.safetensors" lm_model_file = "hf://kyutai/stt-2.6B-en-candle/model.safetensors"
text_tokenizer_file = "hf://kyutai/stt-candle/tokenizer_en_audio_4000.model" text_tokenizer_file = "hf://kyutai/stt-2.6B-en-candle/tokenizer_en_audio_4000.model"
audio_tokenizer_file = "hf://kyutai/stt-candle/mimi-pytorch-e351c8d8@125.safetensors" audio_tokenizer_file = "hf://kyutai/stt-2.6B-en-candle/mimi-pytorch-e351c8d8@125.safetensors"
asr_delay_in_tokens = 6 asr_delay_in_tokens = 6
batch_size = 16 batch_size = 16
conditioning_learnt_padding = true conditioning_learnt_padding = true