Config tweaks.
This commit is contained in:
parent
a786ad8a0b
commit
f87b8f1e6f
10
README.md
10
README.md
|
|
@ -7,7 +7,7 @@ Delayed Streams Modeling (DSM) is a flexible formulation for streaming, multimod
|
||||||
The main model handles english only, it has ~2.6B parameters.
|
The main model handles english only, it has ~2.6B parameters.
|
||||||
|
|
||||||
#### PyTorch implementation
|
#### PyTorch implementation
|
||||||
[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en)
|
[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6b-en)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
|
# wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
|
||||||
|
|
@ -15,15 +15,15 @@ python -m moshi.run_inference --hf-repo kyutai/stt-2.6B-en bria.mp3
|
||||||
```
|
```
|
||||||
|
|
||||||
#### MLX implementation
|
#### MLX implementation
|
||||||
[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en-mlx)
|
[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6b-en-mlx)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
|
# wget https://github.com/metavoiceio/metavoice-src/raw/main/assets/bria.mp3
|
||||||
python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6B-en-mlx bria.mp3 --temp 0
|
python -m moshi_mlx.run_inference --hf-repo kyutai/stt-2.6b-en-mlx bria.mp3 --temp 0
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Rust implementation
|
#### Rust implementation
|
||||||
[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6B-en-candle)
|
[[Hugging Face]](https://huggingface.co/kyutai/stt-2.6b-en-candle)
|
||||||
|
|
||||||
The Rust implementation provides a server that can process multiple streaming
|
The Rust implementation provides a server that can process multiple streaming
|
||||||
queries in parallel. Dependening on the amount of memory on your GPU, you may
|
queries in parallel. Dependening on the amount of memory on your GPU, you may
|
||||||
|
|
@ -59,7 +59,7 @@ the data as fast as possible.
|
||||||
This model has ~1B parameters and supports both English and French.
|
This model has ~1B parameters and supports both English and French.
|
||||||
|
|
||||||
#### Rust implementation
|
#### Rust implementation
|
||||||
[[Hugging Face]](https://huggingface.co/kyutai/stt-1B-en_fr-candle)
|
[[Hugging Face]](https://huggingface.co/kyutai/stt-1b-en_fr-candle)
|
||||||
|
|
||||||
The only difference with the en only model is the config file used when
|
The only difference with the en only model is the config file used when
|
||||||
launching the server.
|
launching the server.
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,9 @@ authorized_ids = ["open_token"]
|
||||||
[modules.asr]
|
[modules.asr]
|
||||||
path = "/api/asr-streaming"
|
path = "/api/asr-streaming"
|
||||||
type = "BatchedAsr"
|
type = "BatchedAsr"
|
||||||
lm_model_file = "hf://kyutai/stt-1B-en_fr-candle/model.safetensors"
|
lm_model_file = "hf://kyutai/stt-1b-en_fr-candle/model.safetensors"
|
||||||
text_tokenizer_file = "hf://kyutai/stt-1B-en_fr-candle/tokenizer_en_fr_audio_8000.model"
|
text_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/tokenizer_en_fr_audio_8000.model"
|
||||||
audio_tokenizer_file = "hf://kyutai/stt-1B-en_fr-candle/mimi-pytorch-e351c8d8@125.safetensors"
|
audio_tokenizer_file = "hf://kyutai/stt-1b-en_fr-candle/mimi-pytorch-e351c8d8@125.safetensors"
|
||||||
asr_delay_in_tokens = 6
|
asr_delay_in_tokens = 6
|
||||||
batch_size = 64
|
batch_size = 64
|
||||||
conditioning_learnt_padding = true
|
conditioning_learnt_padding = true
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,9 @@ authorized_ids = ["open_token"]
|
||||||
[modules.asr]
|
[modules.asr]
|
||||||
path = "/api/asr-streaming"
|
path = "/api/asr-streaming"
|
||||||
type = "BatchedAsr"
|
type = "BatchedAsr"
|
||||||
lm_model_file = "hf://kyutai/stt-2.6B-en-candle/model.safetensors"
|
lm_model_file = "hf://kyutai/stt-2.6b-en-candle/model.safetensors"
|
||||||
text_tokenizer_file = "hf://kyutai/stt-2.6B-en-candle/tokenizer_en_audio_4000.model"
|
text_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/tokenizer_en_audio_4000.model"
|
||||||
audio_tokenizer_file = "hf://kyutai/stt-2.6B-en-candle/mimi-pytorch-e351c8d8@125.safetensors"
|
audio_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/mimi-pytorch-e351c8d8@125.safetensors"
|
||||||
asr_delay_in_tokens = 6
|
asr_delay_in_tokens = 6
|
||||||
batch_size = 16
|
batch_size = 16
|
||||||
conditioning_learnt_padding = true
|
conditioning_learnt_padding = true
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user