runtime:
  engine: "vllm"
  precision: "fp16"
  quantization: "awq-int4"
  source_model: "qwen2.5-7b-k8s-sft-lora-v1"
  chat_template_source: "/models/tokenizer_config.json"
  artifacts_expected:
    - "config.json"
    - "generation_config.json"
    - "tokenizer.json"
    - "tokenizer_config.json"
    - "special_tokens_map.json"
    - "model.safetensors"

omitted_from_runtime:
  - "optimizer.pt"
  - "scheduler.pt"
  - "rng_state.pth"
  - "trainer_state.json"

notes:
  - "训练恢复用文件不需要跟着线上推理服务发布。"
  - "运行时包关注的是可加载、可推理、可观测，而不是可继续训练。"
