Add example for PyTorch implementation
This commit is contained in:
parent
96ff217437
commit
68e1530b05
|
|
@ -17,16 +17,6 @@ from moshi.models.loaders import CheckpointInfo
|
||||||
from moshi.models.tts import DEFAULT_DSM_TTS_REPO, DEFAULT_DSM_TTS_VOICE_REPO, TTSModel
|
from moshi.models.tts import DEFAULT_DSM_TTS_REPO, DEFAULT_DSM_TTS_VOICE_REPO, TTSModel
|
||||||
|
|
||||||
|
|
||||||
def audio_to_int16(audio: np.ndarray) -> np.ndarray:
|
|
||||||
if audio.dtype == np.int16:
|
|
||||||
return audio
|
|
||||||
elif audio.dtype == np.float32:
|
|
||||||
# Multiply by 32767 and not 32768 so that int16 doesn't overflow.
|
|
||||||
return (np.clip(audio, -1, 1) * 32767).astype(np.int16)
|
|
||||||
else:
|
|
||||||
raise TypeError(f"Unsupported audio data type: {audio.dtype}")
|
|
||||||
|
|
||||||
|
|
||||||
def play_audio(audio: np.ndarray, sample_rate: int):
|
def play_audio(audio: np.ndarray, sample_rate: int):
|
||||||
# Requires the Portaudio library which might not be available in all environments.
|
# Requires the Portaudio library which might not be available in all environments.
|
||||||
import sounddevice as sd
|
import sounddevice as sd
|
||||||
|
|
@ -86,7 +76,8 @@ def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
print("Generating audio...")
|
print("Generating audio...")
|
||||||
# This doesn't do streaming generation,
|
# This doesn't do streaming generation, but the model allows it. For now, see Rust
|
||||||
|
# example.
|
||||||
result = tts_model.generate([entries], [condition_attributes])
|
result = tts_model.generate([entries], [condition_attributes])
|
||||||
|
|
||||||
frames = torch.cat(result.frames, dim=-1)
|
frames = torch.cat(result.frames, dim=-1)
|
||||||
|
|
|
||||||
164
tts_pytorch.ipynb
Normal file
164
tts_pytorch.ipynb
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user