static_dir = "./static/" log_dir = "$HOME/tmp/tts-logs" instance_name = "tts" authorized_ids = ["open_token"] [modules.asr] path = "/api/asr-streaming" type = "BatchedAsr" lm_model_file = "hf://kyutai/stt-2.6b-en-candle/model.safetensors" text_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/tokenizer_en_audio_4000.model" audio_tokenizer_file = "hf://kyutai/stt-2.6b-en-candle/mimi-pytorch-e351c8d8@125.safetensors" asr_delay_in_tokens = 32 batch_size = 16 conditioning_learnt_padding = true temperature = 0 [modules.asr.model] audio_vocab_size = 2049 text_in_vocab_size = 4001 text_out_vocab_size = 4000 audio_codebooks = 32 [modules.asr.model.transformer] d_model = 2048 num_heads = 32 num_layers = 48 dim_feedforward = 8192 causal = true norm_first = true bias_ff = false bias_attn = false context = 375 max_period = 100000 use_conv_block = false use_conv_bias = true gating = "silu" norm = "RmsNorm" positional_embedding = "Rope" conv_layout = false conv_kernel_size = 3 kv_repeat = 1 max_seq_len = 40960