Repository Reading Site
05-model-release.yaml
ml-platform/examples/21-llm-serving/05-model-release.yaml
apiVersion: ml.k8s-lab.io/v1alpha1
kind: ModelRelease
metadata:
name: qwen2.5-7b-k8s-chat-prod-20260413
spec:
owner: platform-llm
stage: canary
modelVersionRef:
name: qwen2.5-7b-k8s-sft-lora-v1
runtimePackage:
uri: "s3://ml-assets/runtime/qwen2.5-7b-k8s-sft-v1-awq"
digest: "sha256:3d2ef71c8b6f0a0b8c-example"
engineConfigRef:
path: "ml-platform/examples/21-llm-serving/00-inference-service-config.yaml"
deploymentRef:
name: "qwen2.5-7b-k8s-chat"
namespace: "llm-serving"
approvals:
offlineEval: approved
loadTest: approved
securityScan: approved
productOwner: approved
rollback:
targetRelease: "qwen2.5-7b-k8s-chat-prod-20260406"
maxRollbackTimeMinutes: 10
trafficPolicy:
initialCanaryPercent: 10
promoteIf:
- "ttft_p95_ms < 1800"
- "error_rate < 0.02"
- "user_abort_rate delta < 0.03"
notes:
- "这是服务版本发布单,不是训练阶段的模型登记卡。"
- "这里同时引用 modelVersion、runtimePackage 和 deployment。"