From 7e92c6f28adf2df069d3ba996def44c5660ea68c Mon Sep 17 00:00:00 2001
From: tipi <tipi@fairdinand.eu>
Date: Tue, 12 Aug 2025 17:59:20 +0000
Subject: [PATCH] =?UTF-8?q?app/scripts/tts=5Frunner.py=20hinzugef=C3=BCgt?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/scripts/tts_runner.py | 59 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 app/scripts/tts_runner.py

diff --git a/app/scripts/tts_runner.py b/app/scripts/tts_runner.py
new file mode 100644
index 0000000..733b550
--- /dev/null
+++ b/app/scripts/tts_runner.py
@@ -0,0 +1,59 @@
+#!/usr/bin/env python3
+"""
+Kyutai TTS PyTorch Runner
+Dockerized implementation for text-to-speech generation
+"""
+import sys
+import os
+import argparse
+import torch
+from pathlib import Path
+
+def main():
+    parser = argparse.ArgumentParser(description='Kyutai TTS PyTorch Runner')
+    parser.add_argument('input_file', help='Input text file or "-" for stdin')
+    parser.add_argument('output_file', help='Output audio file')
+    parser.add_argument('--model', default='kyutai/tts-1.6b-en_fr', help='TTS model to use')
+    parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu', help='Device to use')
+    
+    args = parser.parse_args()
+    
+    print(f"Using device: {args.device}")
+    print(f"CUDA available: {torch.cuda.is_available()}")
+    
+    # Handle stdin input
+    if args.input_file == '-':
+        # Read from stdin and create temporary file
+        text = sys.stdin.read().strip()
+        temp_file = '/tmp/temp_input.txt'
+        with open(temp_file, 'w') as f:
+            f.write(text)
+        input_file = temp_file
+    else:
+        input_file = args.input_file
+    
+    # Check if the original TTS script exists
+    tts_script = Path('/app/scripts/tts_pytorch.py')
+    if tts_script.exists():
+        print("Using original TTS script from Kyutai repository")
+        import subprocess
+        cmd = ['python', str(tts_script), input_file, args.output_file]
+        subprocess.run(cmd, check=True)
+    else:
+        print("Using moshi package for TTS generation")
+        import subprocess
+        cmd = [
+            'python', '-m', 'moshi.run_inference', 
+            '--hf-repo', args.model,
+            input_file,
+            args.output_file
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if result.returncode != 0:
+            print(f"Error: {result.stderr}")
+            sys.exit(1)
+        print(f"Audio generated: {args.output_file}")
+
+if __name__ == '__main__':
+    main()
+EOF
\ No newline at end of file