mirror of
https://github.com/SWivid/F5-TTS.git
synced 2025-12-12 07:40:43 -08:00
Update WAV File Naming and Dependencies 📝🔊 (#1091)
* Update infer_cli.py * Update pyproject.toml * formalized --------- Co-authored-by: SWivid <swivid@qq.com>
This commit is contained in:
@@ -38,6 +38,7 @@ dependencies = [
|
|||||||
"tqdm>=4.65.0",
|
"tqdm>=4.65.0",
|
||||||
"transformers",
|
"transformers",
|
||||||
"transformers_stream_generator",
|
"transformers_stream_generator",
|
||||||
|
"unidecode",
|
||||||
"vocos",
|
"vocos",
|
||||||
"wandb",
|
"wandb",
|
||||||
"x_transformers>=1.31.14",
|
"x_transformers>=1.31.14",
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import tomli
|
|||||||
from cached_path import cached_path
|
from cached_path import cached_path
|
||||||
from hydra.utils import get_class
|
from hydra.utils import get_class
|
||||||
from omegaconf import OmegaConf
|
from omegaconf import OmegaConf
|
||||||
|
from unidecode import unidecode
|
||||||
|
|
||||||
from f5_tts.infer.utils_infer import (
|
from f5_tts.infer.utils_infer import (
|
||||||
cfg_strength,
|
cfg_strength,
|
||||||
@@ -112,6 +113,11 @@ parser.add_argument(
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="To save each audio chunks during inference",
|
help="To save each audio chunks during inference",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no_legacy_text",
|
||||||
|
action="store_false",
|
||||||
|
help="Not to use lossy ASCII transliterations of unicode text in saved file names.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--remove_silence",
|
"--remove_silence",
|
||||||
action="store_true",
|
action="store_true",
|
||||||
@@ -197,6 +203,12 @@ output_file = args.output_file or config.get(
|
|||||||
)
|
)
|
||||||
|
|
||||||
save_chunk = args.save_chunk or config.get("save_chunk", False)
|
save_chunk = args.save_chunk or config.get("save_chunk", False)
|
||||||
|
use_legacy_text = args.no_legacy_text or config.get("no_legacy_text", False) # no_legacy_text is a store_false arg
|
||||||
|
if save_chunk and use_legacy_text:
|
||||||
|
print(
|
||||||
|
"\nWarning to --save_chunk: lossy ASCII transliterations of unicode text for legacy (.wav) file names, --no_legacy_text to disable.\n"
|
||||||
|
)
|
||||||
|
|
||||||
remove_silence = args.remove_silence or config.get("remove_silence", False)
|
remove_silence = args.remove_silence or config.get("remove_silence", False)
|
||||||
load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
|
load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
|
||||||
|
|
||||||
@@ -344,6 +356,8 @@ def main():
|
|||||||
if save_chunk:
|
if save_chunk:
|
||||||
if len(gen_text_) > 200:
|
if len(gen_text_) > 200:
|
||||||
gen_text_ = gen_text_[:200] + " ... "
|
gen_text_ = gen_text_[:200] + " ... "
|
||||||
|
if use_legacy_text:
|
||||||
|
gen_text_ = unidecode(gen_text_)
|
||||||
sf.write(
|
sf.write(
|
||||||
os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
|
os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
|
||||||
audio_segment,
|
audio_segment,
|
||||||
|
|||||||
Reference in New Issue
Block a user