mirror of
https://github.com/SWivid/F5-TTS.git
synced 2025-12-25 12:24:54 -08:00
Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
39617fcf7a | ||
|
|
5b82f97c26 |
@@ -30,6 +30,9 @@
|
||||
# Create a conda env with python_version>=3.10 (you could also use virtualenv)
|
||||
conda create -n f5-tts python=3.11
|
||||
conda activate f5-tts
|
||||
|
||||
# Install FFmpeg if you haven't yet
|
||||
conda install ffmpeg
|
||||
```
|
||||
|
||||
### Install PyTorch with matched device
|
||||
@@ -39,7 +42,11 @@ conda activate f5-tts
|
||||
|
||||
> ```bash
|
||||
> # Install pytorch with your CUDA version, e.g.
|
||||
> pip install torch==2.8.0+cu128 torchaudio==2.8.0+cu128 --extra-index-url https://download.pytorch.org/whl/cu128
|
||||
>
|
||||
> # And also possible previous versions, e.g.
|
||||
> pip install torch==2.4.0+cu124 torchaudio==2.4.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124
|
||||
> # etc.
|
||||
> ```
|
||||
|
||||
</details>
|
||||
|
||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "f5-tts"
|
||||
version = "1.1.10"
|
||||
version = "1.1.12"
|
||||
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
|
||||
readme = "README.md"
|
||||
license = {text = "MIT License"}
|
||||
@@ -20,7 +20,7 @@ dependencies = [
|
||||
"click",
|
||||
"datasets",
|
||||
"ema_pytorch>=0.5.2",
|
||||
"gradio>=5.0.0",
|
||||
"gradio>=6.0.0",
|
||||
"hydra-core>=1.3.0",
|
||||
"librosa",
|
||||
"matplotlib",
|
||||
|
||||
@@ -577,7 +577,7 @@ with gr.Blocks() as app_multistyle:
|
||||
label="Cherry-pick Interface",
|
||||
lines=10,
|
||||
max_lines=40,
|
||||
show_copy_button=True,
|
||||
buttons=["copy"], # show_copy_button=True if gradio<6.0
|
||||
interactive=False,
|
||||
visible=False,
|
||||
)
|
||||
@@ -816,7 +816,9 @@ Have a conversation with an AI using your reference voice!
|
||||
lines=2,
|
||||
)
|
||||
|
||||
chatbot_interface = gr.Chatbot(label="Conversation", type="messages")
|
||||
chatbot_interface = gr.Chatbot(
|
||||
label="Conversation"
|
||||
) # type="messages" hard-coded and no need to pass in since gradio 6.0
|
||||
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
@@ -853,6 +855,10 @@ Have a conversation with an AI using your reference voice!
|
||||
@gpu_decorator
|
||||
def generate_text_response(conv_state, system_prompt):
|
||||
"""Generate text response from AI"""
|
||||
for single_state in conv_state:
|
||||
if isinstance(single_state["content"], list):
|
||||
assert len(single_state["content"]) == 1 and single_state["content"][0]["type"] == "text"
|
||||
single_state["content"] = single_state["content"][0]["text"]
|
||||
|
||||
system_prompt_state = [{"role": "system", "content": system_prompt}]
|
||||
response = chat_model_inference(system_prompt_state + conv_state, chat_model_state, chat_tokenizer_state)
|
||||
@@ -866,7 +872,7 @@ Have a conversation with an AI using your reference voice!
|
||||
if not conv_state or not ref_audio:
|
||||
return None, ref_text, seed_input
|
||||
|
||||
last_ai_response = conv_state[-1]["content"]
|
||||
last_ai_response = conv_state[-1]["content"][0]["text"]
|
||||
if not last_ai_response or conv_state[-1]["role"] != "assistant":
|
||||
return None, ref_text, seed_input
|
||||
|
||||
@@ -1108,7 +1114,6 @@ def main(port, host, share, api, root_path, inbrowser):
|
||||
server_name=host,
|
||||
server_port=port,
|
||||
share=share,
|
||||
show_api=api,
|
||||
root_path=root_path,
|
||||
inbrowser=inbrowser,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user