when ref_text empty automatic transcribing

This commit is contained in:
unknown
2024-10-30 14:26:13 +02:00
parent 02d59131c4
commit 3dd59b8cdf

View File

@@ -15,6 +15,9 @@ from f5_tts.infer.utils_infer import (
infer_process, infer_process,
remove_silence_for_generated_wav, remove_silence_for_generated_wav,
save_spectrogram, save_spectrogram,
preprocess_ref_audio_text,
target_sample_rate,
hop_length,
) )
@@ -31,10 +34,8 @@ class F5TTS:
): ):
# Initialize parameters # Initialize parameters
self.final_wave = None self.final_wave = None
self.target_sample_rate = 24000 self.target_sample_rate = target_sample_rate
self.n_mel_channels = 100 self.hop_length = hop_length
self.hop_length = 256
self.target_rms = 0.1
self.seed = -1 self.seed = -1
# Set device # Set device
@@ -97,6 +98,10 @@ class F5TTS:
seed = random.randint(0, sys.maxsize) seed = random.randint(0, sys.maxsize)
seed_everything(seed) seed_everything(seed)
self.seed = seed self.seed = seed
if ref_text == "":
ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device)
wav, sr, spect = infer_process( wav, sr, spect = infer_process(
ref_file, ref_file,
ref_text, ref_text,