kyutai/install.sh

# Set environment variables
export DEBIAN_FRONTEND=noninteractive
export PYTHONUNBUFFERED=1
export CUDA_VISIBLE_DEVICES=0

# Install system dependencies
apt-get update && apt-get install -y \
    wget \
    curl \
    git \
    build-essential \
    libsndfile1 \
    ffmpeg \
    sox \
    alsa-utils \
    pulseaudio \
    && rm -rf /var/lib/apt/lists/*
    

# Install Python dependencies first (for better caching)
pip install --no-cache-dir --upgrade pip

# Create virtual environment
apt install python3.12-venv python3.12-dev
python3.12 -m venv ~/venv-tts-kyutai
source ~/venv-tts-kyutai/bin/activate

# Install Python dependencies first (for better caching)
pip install --no-cache-dir --upgrade pip

# Install PyTorch with CUDA support for Python 3.12
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

# Install core dependencies
pip install --no-cache-dir \
    numpy \
    scipy \
    librosa \
    soundfile \
    huggingface_hub \
    einops \
    transformers \
    accelerate

# Install API dependencies
pip install --no-cache-dir \
    fastapi \
    uvicorn[standard] \
    python-multipart \
    pydantic
    
# Install moshi package with all dependencies (following Colab notebook)
pip install --no-cache-dir 'sphn<0.2'
pip install --no-cache-dir "moshi==0.2.8"

# Create directories for input/output
mkdir -p /app/input /app/output /app/scripts /app/api_output

# Download the Kyutai delayed-streams-modeling repository
#git clone https://github.com/kyutai-labs/delayed-streams-modeling.git /app/kyutai-repo

# Copy the TTS script from the repository
cp /app/kyutai-repo/scripts/tts_pytorch.py /app/scripts/ || echo "TTS script not found, will create custom one"

# Create directories for input/output
mkdir -p /app/input /app/output /app/scripts /app/api_output

# Download the Kyutai delayed-streams-modeling repository
#git clone https://github.com/kyutai-labs/delayed-streams-modeling.git /app/kyutai-repo

# Copy the TTS script from the repository
cp scripts/tts_pytorch.py /app/scripts/ || echo "TTS script not found, will create custom one"

# Create directories for input/output
mkdir -p /app/input /app/output /app/scripts /app/api_output

# Start TTS-Server
python /app/api_server.py
install.sh hinzugefügt 2025-08-12 18:03:43 +00:00			`# Set environment variables`
			`export DEBIAN_FRONTEND=noninteractive`
			`export PYTHONUNBUFFERED=1`
			`export CUDA_VISIBLE_DEVICES=0`

			`# Install system dependencies`
			`apt-get update && apt-get install -y \`
			`wget \`
			`curl \`
			`git \`
			`build-essential \`
			`libsndfile1 \`
			`ffmpeg \`
			`sox \`
			`alsa-utils \`
			`pulseaudio \`
			`&& rm -rf /var/lib/apt/lists/*`


			`# Install Python dependencies first (for better caching)`
			`pip install --no-cache-dir --upgrade pip`

			`# Create virtual environment`
			`apt install python3.12-venv python3.12-dev`
			`python3.12 -m venv ~/venv-tts-kyutai`
			`source ~/venv-tts-kyutai/bin/activate`

			`# Install Python dependencies first (for better caching)`
			`pip install --no-cache-dir --upgrade pip`

			`# Install PyTorch with CUDA support for Python 3.12`
			`pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124`

			`# Install core dependencies`
			`pip install --no-cache-dir \`
			`numpy \`
			`scipy \`
			`librosa \`
			`soundfile \`
			`huggingface_hub \`
			`einops \`
			`transformers \`
			`accelerate`

			`# Install API dependencies`
			`pip install --no-cache-dir \`
			`fastapi \`
			`uvicorn[standard] \`
			`python-multipart \`
			`pydantic`

			`# Install moshi package with all dependencies (following Colab notebook)`
			`pip install --no-cache-dir 'sphn<0.2'`
			`pip install --no-cache-dir "moshi==0.2.8"`

			`# Create directories for input/output`
			`mkdir -p /app/input /app/output /app/scripts /app/api_output`

			`# Download the Kyutai delayed-streams-modeling repository`
			`#git clone https://github.com/kyutai-labs/delayed-streams-modeling.git /app/kyutai-repo`

			`# Copy the TTS script from the repository`
			`cp /app/kyutai-repo/scripts/tts_pytorch.py /app/scripts/ \|\| echo "TTS script not found, will create custom one"`

			`# Create directories for input/output`
			`mkdir -p /app/input /app/output /app/scripts /app/api_output`

			`# Download the Kyutai delayed-streams-modeling repository`
			`#git clone https://github.com/kyutai-labs/delayed-streams-modeling.git /app/kyutai-repo`

			`# Copy the TTS script from the repository`
			`cp scripts/tts_pytorch.py /app/scripts/ \|\| echo "TTS script not found, will create custom one"`

			`# Create directories for input/output`
			`mkdir -p /app/input /app/output /app/scripts /app/api_output`

			`# Start TTS-Server`
			`python /app/api_server.py`