v1.1.12 bump gradio from 5.0 to 6.0, several fixes to ensure compatibility with new gradio version

fix #1239 , use gradio>=6.0; add more clear instruction for ffmpeg installation (#1234 )
2025-12-25 12:24:54 -08:00 · 2025-12-20 18:44:43 +08:00 · 2025-12-20 16:08:13 +08:00
3 changed files with 18 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -30,6 +30,9 @@
 # Create a conda env with python_version>=3.10  (you could also use virtualenv)
 conda create -n f5-tts python=3.11
 conda activate f5-tts
+
+# Install FFmpeg if you haven't yet
+conda install ffmpeg
 ```

 ### Install PyTorch with matched device
@@ -39,7 +42,11 @@ conda activate f5-tts

 > ```bash
 > # Install pytorch with your CUDA version, e.g.
+> pip install torch==2.8.0+cu128 torchaudio==2.8.0+cu128 --extra-index-url https://download.pytorch.org/whl/cu128
+> 
+> # And also possible previous versions, e.g.
 > pip install torch==2.4.0+cu124 torchaudio==2.4.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124
+> # etc.
 > ```

 </details>
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "f5-tts"
-version = "1.1.10"
+version = "1.1.12"
 description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
 readme = "README.md"
 license = {text = "MIT License"}
@@ -20,7 +20,7 @@ dependencies = [
    "click",
    "datasets",
    "ema_pytorch>=0.5.2",
-    "gradio>=5.0.0",
+    "gradio>=6.0.0",
    "hydra-core>=1.3.0",
    "librosa",
    "matplotlib",
--- a/src/f5_tts/infer/infer_gradio.py
+++ b/src/f5_tts/infer/infer_gradio.py
@@ -577,7 +577,7 @@ with gr.Blocks() as app_multistyle:
        label="Cherry-pick Interface",
        lines=10,
        max_lines=40,
-        show_copy_button=True,
+        buttons=["copy"],  # show_copy_button=True if gradio<6.0
        interactive=False,
        visible=False,
    )
@@ -816,7 +816,9 @@ Have a conversation with an AI using your reference voice!
                        lines=2,
                    )

-        chatbot_interface = gr.Chatbot(label="Conversation", type="messages")
+        chatbot_interface = gr.Chatbot(
+            label="Conversation"
+        )  # type="messages" hard-coded and no need to pass in since gradio 6.0

        with gr.Row():
            with gr.Column():
@@ -853,6 +855,10 @@ Have a conversation with an AI using your reference voice!
        @gpu_decorator
        def generate_text_response(conv_state, system_prompt):
            """Generate text response from AI"""
+            for single_state in conv_state:
+                if isinstance(single_state["content"], list):
+                    assert len(single_state["content"]) == 1 and single_state["content"][0]["type"] == "text"
+                    single_state["content"] = single_state["content"][0]["text"]

            system_prompt_state = [{"role": "system", "content": system_prompt}]
            response = chat_model_inference(system_prompt_state + conv_state, chat_model_state, chat_tokenizer_state)
@@ -866,7 +872,7 @@ Have a conversation with an AI using your reference voice!
            if not conv_state or not ref_audio:
                return None, ref_text, seed_input

-            last_ai_response = conv_state[-1]["content"]
+            last_ai_response = conv_state[-1]["content"][0]["text"]
            if not last_ai_response or conv_state[-1]["role"] != "assistant":
                return None, ref_text, seed_input

@@ -1108,7 +1114,6 @@ def main(port, host, share, api, root_path, inbrowser):
        server_name=host,
        server_port=port,
        share=share,
-        show_api=api,
        root_path=root_path,
        inbrowser=inbrowser,
    )
Author	SHA1	Message	Date
SWivid	39617fcf7a	v1.1.12 bump gradio from 5.0 to 6.0, several fixes to ensure compatibility with new gradio version	2025-12-20 18:44:43 +08:00
Yushen Chen	5b82f97c26	fix #1239 , use gradio>=6.0; add more clear instruction for ffmpeg installation (#1234 )	2025-12-20 16:08:13 +08:00