From 7e92c6f28adf2df069d3ba996def44c5660ea68c Mon Sep 17 00:00:00 2001 From: tipi Date: Tue, 12 Aug 2025 17:59:20 +0000 Subject: [PATCH] =?UTF-8?q?app/scripts/tts=5Frunner.py=20hinzugef=C3=BCgt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/scripts/tts_runner.py | 59 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 app/scripts/tts_runner.py diff --git a/app/scripts/tts_runner.py b/app/scripts/tts_runner.py new file mode 100644 index 0000000..733b550 --- /dev/null +++ b/app/scripts/tts_runner.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +""" +Kyutai TTS PyTorch Runner +Dockerized implementation for text-to-speech generation +""" +import sys +import os +import argparse +import torch +from pathlib import Path + +def main(): + parser = argparse.ArgumentParser(description='Kyutai TTS PyTorch Runner') + parser.add_argument('input_file', help='Input text file or "-" for stdin') + parser.add_argument('output_file', help='Output audio file') + parser.add_argument('--model', default='kyutai/tts-1.6b-en_fr', help='TTS model to use') + parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu', help='Device to use') + + args = parser.parse_args() + + print(f"Using device: {args.device}") + print(f"CUDA available: {torch.cuda.is_available()}") + + # Handle stdin input + if args.input_file == '-': + # Read from stdin and create temporary file + text = sys.stdin.read().strip() + temp_file = '/tmp/temp_input.txt' + with open(temp_file, 'w') as f: + f.write(text) + input_file = temp_file + else: + input_file = args.input_file + + # Check if the original TTS script exists + tts_script = Path('/app/scripts/tts_pytorch.py') + if tts_script.exists(): + print("Using original TTS script from Kyutai repository") + import subprocess + cmd = ['python', str(tts_script), input_file, args.output_file] + subprocess.run(cmd, check=True) + else: + print("Using moshi package for TTS generation") + import subprocess + cmd = [ + 'python', '-m', 'moshi.run_inference', + '--hf-repo', args.model, + input_file, + args.output_file + ] + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"Error: {result.stderr}") + sys.exit(1) + print(f"Audio generated: {args.output_file}") + +if __name__ == '__main__': + main() +EOF \ No newline at end of file