mirror of
https://github.com/SWivid/F5-TTS.git
synced 2025-12-12 07:40:43 -08:00
when ref_text empty automatic transcribing
This commit is contained in:
@@ -15,6 +15,9 @@ from f5_tts.infer.utils_infer import (
|
|||||||
infer_process,
|
infer_process,
|
||||||
remove_silence_for_generated_wav,
|
remove_silence_for_generated_wav,
|
||||||
save_spectrogram,
|
save_spectrogram,
|
||||||
|
preprocess_ref_audio_text,
|
||||||
|
target_sample_rate,
|
||||||
|
hop_length,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -31,10 +34,8 @@ class F5TTS:
|
|||||||
):
|
):
|
||||||
# Initialize parameters
|
# Initialize parameters
|
||||||
self.final_wave = None
|
self.final_wave = None
|
||||||
self.target_sample_rate = 24000
|
self.target_sample_rate = target_sample_rate
|
||||||
self.n_mel_channels = 100
|
self.hop_length = hop_length
|
||||||
self.hop_length = 256
|
|
||||||
self.target_rms = 0.1
|
|
||||||
self.seed = -1
|
self.seed = -1
|
||||||
|
|
||||||
# Set device
|
# Set device
|
||||||
@@ -97,6 +98,10 @@ class F5TTS:
|
|||||||
seed = random.randint(0, sys.maxsize)
|
seed = random.randint(0, sys.maxsize)
|
||||||
seed_everything(seed)
|
seed_everything(seed)
|
||||||
self.seed = seed
|
self.seed = seed
|
||||||
|
|
||||||
|
if ref_text == "":
|
||||||
|
ref_file, ref_text = preprocess_ref_audio_text(ref_file, ref_text, device=self.device)
|
||||||
|
|
||||||
wav, sr, spect = infer_process(
|
wav, sr, spect = infer_process(
|
||||||
ref_file,
|
ref_file,
|
||||||
ref_text,
|
ref_text,
|
||||||
|
|||||||
Reference in New Issue
Block a user