update Bigvgan vocoder and F5-bigvgan version, trained on Emilia ZH&EN, 1.25m updates

This commit is contained in:
ZhikangNiu
2024-10-31 20:06:36 +08:00
parent dee0420b59
commit 712d52772e
14 changed files with 365 additions and 177 deletions

View File

@@ -1,24 +1,18 @@
import random
import sys
import tqdm
from importlib.resources import files
import soundfile as sf
import torch
import tqdm
from cached_path import cached_path
from f5_tts.infer.utils_infer import (hop_length, infer_process, load_model,
load_vocoder, preprocess_ref_audio_text,
remove_silence_for_generated_wav,
save_spectrogram, target_sample_rate)
from f5_tts.model import DiT, UNetT
from f5_tts.model.utils import seed_everything
from f5_tts.infer.utils_infer import (
load_vocoder,
load_model,
infer_process,
remove_silence_for_generated_wav,
save_spectrogram,
preprocess_ref_audio_text,
target_sample_rate,
hop_length,
)
class F5TTS:
@@ -29,6 +23,7 @@ class F5TTS:
vocab_file="",
ode_method="euler",
use_ema=True,
vocoder_name="vocos",
local_path=None,
device=None,
):
@@ -44,11 +39,11 @@ class F5TTS:
)
# Load models
self.load_vocoder_model(local_path)
self.load_vocoder_model(vocoder_name, local_path)
self.load_ema_model(model_type, ckpt_file, vocab_file, ode_method, use_ema)
def load_vocoder_model(self, local_path):
self.vocoder = load_vocoder(local_path is not None, local_path, self.device)
def load_vocoder_model(self, vocoder_name, local_path):
self.vocoder = load_vocoder(vocoder_name, local_path is not None, local_path, self.device)
def load_ema_model(self, model_type, ckpt_file, vocab_file, ode_method, use_ema):
if model_type == "F5-TTS":