mirror of
https://github.com/SWivid/F5-TTS.git
synced 2025-12-25 04:15:21 -08:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ac79d0ec1e | ||
|
|
dad398c0c1 | ||
|
|
3d969bf78d |
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "f5-tts"
|
||||
version = "1.1.3"
|
||||
version = "1.1.4"
|
||||
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
|
||||
readme = "README.md"
|
||||
license = {text = "MIT License"}
|
||||
|
||||
@@ -80,6 +80,8 @@ def load_custom(ckpt_path: str, vocab_path="", model_cfg=None):
|
||||
vocab_path = str(cached_path(vocab_path))
|
||||
if model_cfg is None:
|
||||
model_cfg = json.loads(DEFAULT_TTS_MODEL_CFG[2])
|
||||
elif isinstance(model_cfg, str):
|
||||
model_cfg = json.loads(model_cfg)
|
||||
return load_model(DiT, model_cfg, ckpt_path, vocab_file=vocab_path)
|
||||
|
||||
|
||||
@@ -124,7 +126,7 @@ def load_text_from_file(file):
|
||||
return gr.update(value=text)
|
||||
|
||||
|
||||
@lru_cache(maxsize=100)
|
||||
@lru_cache(maxsize=100) # NOTE. need to ensure params of infer() hashable
|
||||
@gpu_decorator
|
||||
def infer(
|
||||
ref_audio_orig,
|
||||
@@ -163,7 +165,7 @@ def infer(
|
||||
show_info("Loading E2-TTS model...")
|
||||
E2TTS_ema_model = load_e2tts()
|
||||
ema_model = E2TTS_ema_model
|
||||
elif isinstance(model, list) and model[0] == "Custom":
|
||||
elif isinstance(model, tuple) and model[0] == "Custom":
|
||||
assert not USING_SPACES, "Only official checkpoints allowed in Spaces."
|
||||
global custom_ema_model, pre_custom_path
|
||||
if pre_custom_path != model[1]:
|
||||
@@ -357,6 +359,7 @@ def parse_speechtypes_text(gen_text):
|
||||
try: # if type dict
|
||||
current_type_dict = json.loads(type_str)
|
||||
except json.decoder.JSONDecodeError:
|
||||
type_str = type_str[1:-1] # remove brace {}
|
||||
current_type_dict = {"name": type_str, "seed": -1, "speed": 1.0}
|
||||
|
||||
return segments
|
||||
@@ -958,7 +961,7 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
|
||||
global tts_model_choice
|
||||
if new_choice == "Custom": # override in case webpage is refreshed
|
||||
custom_ckpt_path, custom_vocab_path, custom_model_cfg = load_last_used_custom()
|
||||
tts_model_choice = ["Custom", custom_ckpt_path, custom_vocab_path, json.loads(custom_model_cfg)]
|
||||
tts_model_choice = ("Custom", custom_ckpt_path, custom_vocab_path, custom_model_cfg)
|
||||
return (
|
||||
gr.update(visible=True, value=custom_ckpt_path),
|
||||
gr.update(visible=True, value=custom_vocab_path),
|
||||
@@ -970,7 +973,7 @@ If you're having issues, try converting your reference audio to WAV or MP3, clip
|
||||
|
||||
def set_custom_model(custom_ckpt_path, custom_vocab_path, custom_model_cfg):
|
||||
global tts_model_choice
|
||||
tts_model_choice = ["Custom", custom_ckpt_path, custom_vocab_path, json.loads(custom_model_cfg)]
|
||||
tts_model_choice = ("Custom", custom_ckpt_path, custom_vocab_path, custom_model_cfg)
|
||||
with open(last_used_custom, "w", encoding="utf-8") as f:
|
||||
f.write(custom_ckpt_path + "\n" + custom_vocab_path + "\n" + custom_model_cfg + "\n")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user