kyutai/configs/config-stt-en-hf.toml
Václav Volhejn 0112245ef7
Refactor Rust server examples (#19)
* Rename examples and add pre-commit

* Fix references to scripts, add implementations overview

* Link to colab notebook via github

* Simplify

* Add auth note

* Allow visualizing VAD

* Remove unused variable

* Add audio samples

* Address review comments
2025-06-26 16:51:43 +02:00

43 lines
1016 B
TOML

static_dir = "./static/"
log_dir = "$HOME/tmp/tts-logs"
instance_name = "tts"
authorized_ids = ["public_token"]
[modules.asr]
path = "/api/asr-streaming"
type = "BatchedAsr"
lm_model_file = "hf://kyutai/stt-2.6b-en-candle/model.safetensors"
text_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/tokenizer_en_audio_4000.model"
audio_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/mimi-pytorch-e351c8d8@125.safetensors"
asr_delay_in_tokens = 32
batch_size = 16
conditioning_learnt_padding = true
temperature = 0
[modules.asr.model]
audio_vocab_size = 2049
text_in_vocab_size = 4001
text_out_vocab_size = 4000
audio_codebooks = 32
[modules.asr.model.transformer]
d_model = 2048
num_heads = 32
num_layers = 48
dim_feedforward = 8192
causal = true
norm_first = true
bias_ff = false
bias_attn = false
context = 375
max_period = 100000
use_conv_block = false
use_conv_bias = true
gating = "silu"
norm = "RmsNorm"
positional_embedding = "Rope"
conv_layout = false
conv_kernel_size = 3
kv_repeat = 1
max_seq_len = 40960