diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 9c3685d0e..e9d9a7801 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -4252,7 +4252,7 @@ gptoss-fp4-b200-vllm-agentic: - { tp: 8, offloading: cpu, conc-list: [64, 96, 128, 192, 256] } minimaxm2.5-fp8-b200-vllm: - image: vllm/vllm-openai:v0.19.0-cu130 + image: vllm/vllm-openai:v0.21.0 model: MiniMaxAI/MiniMax-M2.5 model-prefix: minimaxm2.5 runner: b200 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 85405df4e..a8ac0bb65 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2692,3 +2692,9 @@ description: - "Update vLLM ROCm image from v0.18.0 (50d old) to v0.21.0" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1469 + +- config-keys: + - minimaxm2.5-fp8-b200-vllm + description: + - "Update vLLM image from v0.19.0-cu130 (25d old) to v0.21.0" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1449