mirror of
https://github.com/SWivid/F5-TTS.git
synced 2025-12-12 07:40:43 -08:00
Replace jieba pkg with rjieba - a jieba-rs Python binding
This commit is contained in:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "f5-tts"
|
name = "f5-tts"
|
||||||
version = "1.1.9"
|
version = "1.1.10"
|
||||||
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
|
description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
license = {text = "MIT License"}
|
license = {text = "MIT License"}
|
||||||
@@ -22,13 +22,13 @@ dependencies = [
|
|||||||
"ema_pytorch>=0.5.2",
|
"ema_pytorch>=0.5.2",
|
||||||
"gradio>=5.0.0",
|
"gradio>=5.0.0",
|
||||||
"hydra-core>=1.3.0",
|
"hydra-core>=1.3.0",
|
||||||
"jieba",
|
|
||||||
"librosa",
|
"librosa",
|
||||||
"matplotlib",
|
"matplotlib",
|
||||||
"numpy<=1.26.4; python_version<='3.10'",
|
"numpy<=1.26.4; python_version<='3.10'",
|
||||||
"pydantic<=2.10.6",
|
"pydantic<=2.10.6",
|
||||||
"pydub",
|
"pydub",
|
||||||
"pypinyin",
|
"pypinyin",
|
||||||
|
"rjieba",
|
||||||
"safetensors",
|
"safetensors",
|
||||||
"soundfile",
|
"soundfile",
|
||||||
"tomli",
|
"tomli",
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import random
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from importlib.resources import files
|
from importlib.resources import files
|
||||||
|
|
||||||
import jieba
|
import rjieba
|
||||||
import torch
|
import torch
|
||||||
from pypinyin import Style, lazy_pinyin
|
from pypinyin import Style, lazy_pinyin
|
||||||
from torch.nn.utils.rnn import pad_sequence
|
from torch.nn.utils.rnn import pad_sequence
|
||||||
@@ -146,10 +146,6 @@ def get_tokenizer(dataset_name, tokenizer: str = "pinyin"):
|
|||||||
|
|
||||||
|
|
||||||
def convert_char_to_pinyin(text_list, polyphone=True):
|
def convert_char_to_pinyin(text_list, polyphone=True):
|
||||||
if jieba.dt.initialized is False:
|
|
||||||
jieba.default_logger.setLevel(50) # CRITICAL
|
|
||||||
jieba.initialize()
|
|
||||||
|
|
||||||
final_text_list = []
|
final_text_list = []
|
||||||
custom_trans = str.maketrans(
|
custom_trans = str.maketrans(
|
||||||
{";": ",", "“": '"', "”": '"', "‘": "'", "’": "'"}
|
{";": ",", "“": '"', "”": '"', "‘": "'", "’": "'"}
|
||||||
@@ -163,7 +159,7 @@ def convert_char_to_pinyin(text_list, polyphone=True):
|
|||||||
for text in text_list:
|
for text in text_list:
|
||||||
char_list = []
|
char_list = []
|
||||||
text = text.translate(custom_trans)
|
text = text.translate(custom_trans)
|
||||||
for seg in jieba.cut(text):
|
for seg in rjieba.cut(text):
|
||||||
seg_byte_len = len(bytes(seg, "UTF-8"))
|
seg_byte_len = len(bytes(seg, "UTF-8"))
|
||||||
if seg_byte_len == len(seg): # if pure alphabets and symbols
|
if seg_byte_len == len(seg): # if pure alphabets and symbols
|
||||||
if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
|
if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
|
||||||
|
|||||||
@@ -1,3 +1,3 @@
|
|||||||
FROM nvcr.io/nvidia/tritonserver:24.12-py3
|
FROM nvcr.io/nvidia/tritonserver:24.12-py3
|
||||||
RUN pip install tritonclient[grpc] tensorrt-llm==0.16.0 torchaudio==2.5.1 jieba pypinyin librosa vocos
|
RUN pip install tritonclient[grpc] tensorrt-llm==0.16.0 torchaudio==2.5.1 rjieba pypinyin librosa vocos
|
||||||
WORKDIR /workspace
|
WORKDIR /workspace
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import jieba
|
import rjieba
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
import triton_python_backend_utils as pb_utils
|
import triton_python_backend_utils as pb_utils
|
||||||
@@ -66,7 +66,7 @@ def convert_char_to_pinyin(reference_target_texts_list, polyphone=True):
|
|||||||
for text in reference_target_texts_list:
|
for text in reference_target_texts_list:
|
||||||
char_list = []
|
char_list = []
|
||||||
text = text.translate(custom_trans)
|
text = text.translate(custom_trans)
|
||||||
for seg in jieba.cut(text):
|
for seg in rjieba.cut(text):
|
||||||
seg_byte_len = len(bytes(seg, "UTF-8"))
|
seg_byte_len = len(bytes(seg, "UTF-8"))
|
||||||
if seg_byte_len == len(seg): # if pure alphabets and symbols
|
if seg_byte_len == len(seg): # if pure alphabets and symbols
|
||||||
if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
|
if char_list and seg_byte_len > 1 and char_list[-1] not in " :'\"":
|
||||||
|
|||||||
@@ -225,5 +225,5 @@ if __name__ == "__main__":
|
|||||||
# bad zh asr cnt 230435 (samples)
|
# bad zh asr cnt 230435 (samples)
|
||||||
# bad eh asr cnt 37217 (samples)
|
# bad eh asr cnt 37217 (samples)
|
||||||
|
|
||||||
# vocab size may be slightly different due to jieba tokenizer and pypinyin (e.g. way of polyphoneme)
|
# vocab size may be slightly different due to rjieba tokenizer and pypinyin (e.g. way of polyphoneme)
|
||||||
# please be careful if using pretrained model, make sure the vocab.txt is same
|
# please be careful if using pretrained model, make sure the vocab.txt is same
|
||||||
|
|||||||
@@ -122,5 +122,5 @@ if __name__ == "__main__":
|
|||||||
# - - 1459 (polyphone)
|
# - - 1459 (polyphone)
|
||||||
# char vocab size 5264 5219 5042
|
# char vocab size 5264 5219 5042
|
||||||
|
|
||||||
# vocab size may be slightly different due to jieba tokenizer and pypinyin (e.g. way of polyphoneme)
|
# vocab size may be slightly different due to rjieba tokenizer and pypinyin (e.g. way of polyphoneme)
|
||||||
# please be careful if using pretrained model, make sure the vocab.txt is same
|
# please be careful if using pretrained model, make sure the vocab.txt is same
|
||||||
|
|||||||
Reference in New Issue
Block a user