Repository Reading Site
training_args.json
ml-platform/examples/20-llm-training/02-checkpoint/checkpoint-000120/training_args.json
{
"output_dir": "/mnt/models/runs/sft-qwen2.5-7b-k8s-001",
"num_train_epochs": 3,
"max_steps": 1200,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 16,
"learning_rate": 0.0002,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.03,
"bf16": true,
"gradient_checkpointing": true,
"evaluation_strategy": "steps",
"eval_steps": 100,
"save_steps": 120,
"save_total_limit": 3,
"seed": 42
}