78 lines
2.2 KiB
Bash
78 lines
2.2 KiB
Bash
|
|
# Set environment variables
|
||
|
|
export DEBIAN_FRONTEND=noninteractive
|
||
|
|
export PYTHONUNBUFFERED=1
|
||
|
|
export CUDA_VISIBLE_DEVICES=0
|
||
|
|
|
||
|
|
# Install system dependencies
|
||
|
|
apt-get update && apt-get install -y \
|
||
|
|
wget \
|
||
|
|
curl \
|
||
|
|
git \
|
||
|
|
build-essential \
|
||
|
|
libsndfile1 \
|
||
|
|
ffmpeg \
|
||
|
|
sox \
|
||
|
|
alsa-utils \
|
||
|
|
pulseaudio \
|
||
|
|
&& rm -rf /var/lib/apt/lists/*
|
||
|
|
|
||
|
|
|
||
|
|
# Install Python dependencies first (for better caching)
|
||
|
|
pip install --no-cache-dir --upgrade pip
|
||
|
|
|
||
|
|
# Create virtual environment
|
||
|
|
apt install python3.12-venv python3.12-dev
|
||
|
|
python3.12 -m venv ~/venv-tts-kyutai
|
||
|
|
source ~/venv-tts-kyutai/bin/activate
|
||
|
|
|
||
|
|
# Install Python dependencies first (for better caching)
|
||
|
|
pip install --no-cache-dir --upgrade pip
|
||
|
|
|
||
|
|
# Install PyTorch with CUDA support for Python 3.12
|
||
|
|
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
|
||
|
|
|
||
|
|
# Install core dependencies
|
||
|
|
pip install --no-cache-dir \
|
||
|
|
numpy \
|
||
|
|
scipy \
|
||
|
|
librosa \
|
||
|
|
soundfile \
|
||
|
|
huggingface_hub \
|
||
|
|
einops \
|
||
|
|
transformers \
|
||
|
|
accelerate
|
||
|
|
|
||
|
|
# Install API dependencies
|
||
|
|
pip install --no-cache-dir \
|
||
|
|
fastapi \
|
||
|
|
uvicorn[standard] \
|
||
|
|
python-multipart \
|
||
|
|
pydantic
|
||
|
|
|
||
|
|
# Install moshi package with all dependencies (following Colab notebook)
|
||
|
|
pip install --no-cache-dir 'sphn<0.2'
|
||
|
|
pip install --no-cache-dir "moshi==0.2.8"
|
||
|
|
|
||
|
|
# Create directories for input/output
|
||
|
|
mkdir -p /app/input /app/output /app/scripts /app/api_output
|
||
|
|
|
||
|
|
# Download the Kyutai delayed-streams-modeling repository
|
||
|
|
#git clone https://github.com/kyutai-labs/delayed-streams-modeling.git /app/kyutai-repo
|
||
|
|
|
||
|
|
# Copy the TTS script from the repository
|
||
|
|
cp /app/kyutai-repo/scripts/tts_pytorch.py /app/scripts/ || echo "TTS script not found, will create custom one"
|
||
|
|
|
||
|
|
# Create directories for input/output
|
||
|
|
mkdir -p /app/input /app/output /app/scripts /app/api_output
|
||
|
|
|
||
|
|
# Download the Kyutai delayed-streams-modeling repository
|
||
|
|
#git clone https://github.com/kyutai-labs/delayed-streams-modeling.git /app/kyutai-repo
|
||
|
|
|
||
|
|
# Copy the TTS script from the repository
|
||
|
|
cp scripts/tts_pytorch.py /app/scripts/ || echo "TTS script not found, will create custom one"
|
||
|
|
|
||
|
|
# Create directories for input/output
|
||
|
|
mkdir -p /app/input /app/output /app/scripts /app/api_output
|
||
|
|
|
||
|
|
# Start TTS-Server
|
||
|
|
python /app/api_server.py
|