2 Commits

3 changed files with 18 additions and 6 deletions

View File

@@ -30,6 +30,9 @@
# Create a conda env with python_version>=3.10 (you could also use virtualenv)
conda create -n f5-tts python=3.11
conda activate f5-tts
# Install FFmpeg if you haven't yet
conda install ffmpeg
```
### Install PyTorch with matched device
@@ -39,7 +42,11 @@ conda activate f5-tts
> ```bash
> # Install pytorch with your CUDA version, e.g.
> pip install torch==2.8.0+cu128 torchaudio==2.8.0+cu128 --extra-index-url https://download.pytorch.org/whl/cu128
>
> # And also possible previous versions, e.g.
> pip install torch==2.4.0+cu124 torchaudio==2.4.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124
> # etc.
> ```
</details>

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "f5-tts"
version = "1.1.10"
version = "1.1.12"
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
readme = "README.md"
license = {text = "MIT License"}
@@ -20,7 +20,7 @@ dependencies = [
"click",
"datasets",
"ema_pytorch>=0.5.2",
"gradio>=5.0.0",
"gradio>=6.0.0",
"hydra-core>=1.3.0",
"librosa",
"matplotlib",

View File

@@ -577,7 +577,7 @@ with gr.Blocks() as app_multistyle:
label="Cherry-pick Interface",
lines=10,
max_lines=40,
show_copy_button=True,
buttons=["copy"], # show_copy_button=True if gradio<6.0
interactive=False,
visible=False,
)
@@ -816,7 +816,9 @@ Have a conversation with an AI using your reference voice!
lines=2,
)
chatbot_interface = gr.Chatbot(label="Conversation", type="messages")
chatbot_interface = gr.Chatbot(
label="Conversation"
) # type="messages" hard-coded and no need to pass in since gradio 6.0
with gr.Row():
with gr.Column():
@@ -853,6 +855,10 @@ Have a conversation with an AI using your reference voice!
@gpu_decorator
def generate_text_response(conv_state, system_prompt):
"""Generate text response from AI"""
for single_state in conv_state:
if isinstance(single_state["content"], list):
assert len(single_state["content"]) == 1 and single_state["content"][0]["type"] == "text"
single_state["content"] = single_state["content"][0]["text"]
system_prompt_state = [{"role": "system", "content": system_prompt}]
response = chat_model_inference(system_prompt_state + conv_state, chat_model_state, chat_tokenizer_state)
@@ -866,7 +872,7 @@ Have a conversation with an AI using your reference voice!
if not conv_state or not ref_audio:
return None, ref_text, seed_input
last_ai_response = conv_state[-1]["content"]
last_ai_response = conv_state[-1]["content"][0]["text"]
if not last_ai_response or conv_state[-1]["role"] != "assistant":
return None, ref_text, seed_input
@@ -1108,7 +1114,6 @@ def main(port, host, share, api, root_path, inbrowser):
server_name=host,
server_port=port,
share=share,
show_api=api,
root_path=root_path,
inbrowser=inbrowser,
)