Merge pull request #741 from Chiyan200/main

Fix Settings Loader Issues: Resolve KeyErrors, Path Handling, and Component Assignment (#731)
This commit is contained in:
Yushen CHEN
2025-01-27 19:28:22 +08:00
committed by GitHub

View File

@@ -116,41 +116,57 @@ def load_settings(project_name):
path_project = os.path.join(path_project_ckpts, project_name)
file_setting = os.path.join(path_project, "setting.json")
if not os.path.isfile(file_setting):
settings = {
"exp_name": "F5TTS_Base",
"learning_rate": 1e-05,
"batch_size_per_gpu": 1000,
"batch_size_type": "frame",
"max_samples": 64,
"grad_accumulation_steps": 1,
"max_grad_norm": 1,
"epochs": 100,
"num_warmup_updates": 2,
"save_per_updates": 300,
"keep_last_n_checkpoints": -1,
"last_per_updates": 100,
"finetune": True,
"file_checkpoint_train": "",
"tokenizer_type": "pinyin",
"tokenizer_file": "",
"mixed_precision": "none",
"logger": "wandb",
"bnb_optimizer": False,
}
else:
with open(file_setting, "r") as f:
settings = json.load(f)
if "logger" not in settings:
settings["logger"] = "wandb"
if "bnb_optimizer" not in settings:
settings["bnb_optimizer"] = False
if "keep_last_n_checkpoints" not in settings:
settings["keep_last_n_checkpoints"] = -1 # default to keep all checkpoints
if "last_per_updates" not in settings: # patch for backward compatibility, with before f992c4e
settings["last_per_updates"] = settings["last_per_steps"] // settings["grad_accumulation_steps"]
# Default settings
default_settings = {
"exp_name": "F5TTS_Base",
"learning_rate": 1e-05,
"batch_size_per_gpu": 1000,
"batch_size_type": "frame",
"max_samples": 64,
"grad_accumulation_steps": 1,
"max_grad_norm": 1,
"epochs": 100,
"num_warmup_updates": 2,
"save_per_updates": 300,
"keep_last_n_checkpoints": -1,
"last_per_updates": 100,
"finetune": True,
"file_checkpoint_train": "",
"tokenizer_type": "pinyin",
"tokenizer_file": "",
"mixed_precision": "none",
"logger": "wandb",
"bnb_optimizer": False,
}
return settings
# Load settings from file if it exists
if os.path.isfile(file_setting):
with open(file_setting, "r") as f:
file_settings = json.load(f)
default_settings.update(file_settings)
# Return as a tuple in the correct order
return (
default_settings["exp_name"],
default_settings["learning_rate"],
default_settings["batch_size_per_gpu"],
default_settings["batch_size_type"],
default_settings["max_samples"],
default_settings["grad_accumulation_steps"],
default_settings["max_grad_norm"],
default_settings["epochs"],
default_settings["num_warmup_updates"],
default_settings["save_per_updates"],
default_settings["keep_last_n_checkpoints"],
default_settings["last_per_updates"],
default_settings["finetune"],
default_settings["file_checkpoint_train"],
default_settings["tokenizer_type"],
default_settings["tokenizer_file"],
default_settings["mixed_precision"],
default_settings["logger"],
default_settings["bnb_optimizer"],
)
# Load metadata
@@ -1579,27 +1595,48 @@ If you encounter a memory error, try reducing the batch size per GPU to a smalle
stop_button = gr.Button("Stop Training", interactive=False)
if projects_selelect is not None:
settings = load_settings(projects_selelect)
(
exp_name_value,
learning_rate_value,
batch_size_per_gpu_value,
batch_size_type_value,
max_samples_value,
grad_accumulation_steps_value,
max_grad_norm_value,
epochs_value,
num_warmup_updates_value,
save_per_updates_value,
keep_last_n_checkpoints_value,
last_per_updates_value,
finetune_value,
file_checkpoint_train_value,
tokenizer_type_value,
tokenizer_file_value,
mixed_precision_value,
logger_value,
bnb_optimizer_value,
) = load_settings(projects_selelect)
exp_name.value = settings["exp_name"]
learning_rate.value = settings["learning_rate"]
batch_size_per_gpu.value = settings["batch_size_per_gpu"]
batch_size_type.value = settings["batch_size_type"]
max_samples.value = settings["max_samples"]
grad_accumulation_steps.value = settings["grad_accumulation_steps"]
max_grad_norm.value = settings["max_grad_norm"]
epochs.value = settings["epochs"]
num_warmup_updates.value = settings["num_warmup_updates"]
save_per_updates.value = settings["save_per_updates"]
keep_last_n_checkpoints.value = settings["keep_last_n_checkpoints"]
last_per_updates.value = settings["last_per_updates"]
ch_finetune.value = settings["finetune"]
file_checkpoint_train.value = settings["file_checkpoint_train"]
tokenizer_type.value = settings["tokenizer_type"]
tokenizer_file.value = settings["tokenizer_file"]
mixed_precision.value = settings["mixed_precision"]
cd_logger.value = settings["logger"]
ch_8bit_adam.value = settings["bnb_optimizer"]
# Assigning values to the respective components
exp_name.value = exp_name_value
learning_rate.value = learning_rate_value
batch_size_per_gpu.value = batch_size_per_gpu_value
batch_size_type.value = batch_size_type_value
max_samples.value = max_samples_value
grad_accumulation_steps.value = grad_accumulation_steps_value
max_grad_norm.value = max_grad_norm_value
epochs.value = epochs_value
num_warmup_updates.value = num_warmup_updates_value
save_per_updates.value = save_per_updates_value
keep_last_n_checkpoints.value = keep_last_n_checkpoints_value
last_per_updates.value = last_per_updates_value
ch_finetune.value = finetune_value
file_checkpoint_train.value = file_checkpoint_train_value
tokenizer_type.value = tokenizer_type_value
tokenizer_file.value = tokenizer_file_value
mixed_precision.value = mixed_precision_value
cd_logger.value = logger_value
ch_8bit_adam.value = bnb_optimizer_value
ch_stream = gr.Checkbox(label="Stream Output Experiment", value=True)
txt_info_train = gr.Text(label="Info", value="")