From 76f14ddfb0d7dc3fb7efde260cfa09fd27a13087 Mon Sep 17 00:00:00 2001
From: Sematre <Sematre@gmx.de>
Date: Sat, 5 Jul 2025 20:43:57 +0200
Subject: [PATCH] Add cpu inference option

---
 README.md              |  3 +++
 scripts/tts_pytorch.py | 16 +++++++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0daabaa..79901e9 100644
--- a/README.md
+++ b/README.md
@@ -234,6 +234,9 @@ echo "Hey, how are you?" | python scripts/tts_pytorch.py - -
 
 # From text file to audio file
 python scripts/tts_pytorch.py text_to_say.txt audio_output.wav
+
+# Use --cpu flag for CPU-only inference
+python scripts/tts_pytorch.py --cpu text_to_say.txt audio_output.wav
 ```
 
 This requires the [moshi package](https://pypi.org/project/moshi/), which can be installed via pip.
diff --git a/scripts/tts_pytorch.py b/scripts/tts_pytorch.py
index 9230319..750cbbd 100644
--- a/scripts/tts_pytorch.py
+++ b/scripts/tts_pytorch.py
@@ -44,12 +44,26 @@ def main():
         help="The voice to use, relative to the voice repo root. "
         f"See {DEFAULT_DSM_TTS_VOICE_REPO}",
     )
+    parser.add_argument(
+        "--cpu",
+        action="store_true",
+        help="Use CPU instead of GPU for inference",
+    )
     args = parser.parse_args()
 
     print("Loading model...")
     checkpoint_info = CheckpointInfo.from_hf_repo(args.hf_repo)
+
+    # Set device and precision
+    if args.cpu:
+        device = torch.device("cpu")
+        dtype = torch.float32
+    else:
+        device = torch.device("cuda")
+        dtype = torch.bfloat16
+
     tts_model = TTSModel.from_checkpoint_info(
-        checkpoint_info, n_q=32, temp=0.6, device=torch.device("cuda")
+        checkpoint_info, n_q=32, temp=0.6, device=device, dtype=dtype
     )
 
     if args.inp == "-":