From ebbd7bd91ff5f8f5d07d9176493e0d2bee33aa22 Mon Sep 17 00:00:00 2001
From: Danh Tran <cprmmb99@gmail.com>
Date: Tue, 24 Jun 2025 22:23:00 +0700
Subject: [PATCH] =?UTF-8?q?Update=20WAV=20File=20Naming=20and=20Dependenci?=
 =?UTF-8?q?es=20=F0=9F=93=9D=F0=9F=94=8A=20(#1091)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Update infer_cli.py

* Update pyproject.toml

* formalized

---------

Co-authored-by: SWivid <swivid@qq.com>
---
 pyproject.toml                |  1 +
 src/f5_tts/infer/infer_cli.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 69e4c9c..bbd633e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
     "tqdm>=4.65.0",
     "transformers",
     "transformers_stream_generator",
+    "unidecode",
     "vocos",
     "wandb",
     "x_transformers>=1.31.14",
diff --git a/src/f5_tts/infer/infer_cli.py b/src/f5_tts/infer/infer_cli.py
index 95800fd..5bd20ce 100644
--- a/src/f5_tts/infer/infer_cli.py
+++ b/src/f5_tts/infer/infer_cli.py
@@ -12,6 +12,7 @@ import tomli
 from cached_path import cached_path
 from hydra.utils import get_class
 from omegaconf import OmegaConf
+from unidecode import unidecode
 
 from f5_tts.infer.utils_infer import (
     cfg_strength,
@@ -112,6 +113,11 @@ parser.add_argument(
     action="store_true",
     help="To save each audio chunks during inference",
 )
+parser.add_argument(
+    "--no_legacy_text",
+    action="store_false",
+    help="Not to use lossy ASCII transliterations of unicode text in saved file names.",
+)
 parser.add_argument(
     "--remove_silence",
     action="store_true",
@@ -197,6 +203,12 @@ output_file = args.output_file or config.get(
 )
 
 save_chunk = args.save_chunk or config.get("save_chunk", False)
+use_legacy_text = args.no_legacy_text or config.get("no_legacy_text", False)  # no_legacy_text is a store_false arg
+if save_chunk and use_legacy_text:
+    print(
+        "\nWarning to --save_chunk: lossy ASCII transliterations of unicode text for legacy (.wav) file names, --no_legacy_text to disable.\n"
+    )
+
 remove_silence = args.remove_silence or config.get("remove_silence", False)
 load_vocoder_from_local = args.load_vocoder_from_local or config.get("load_vocoder_from_local", False)
 
@@ -344,6 +356,8 @@ def main():
         if save_chunk:
             if len(gen_text_) > 200:
                 gen_text_ = gen_text_[:200] + " ... "
+            if use_legacy_text:
+                gen_text_ = unidecode(gen_text_)
             sf.write(
                 os.path.join(output_chunk_dir, f"{len(generated_audio_segments) - 1}_{gen_text_}.wav"),
                 audio_segment,