diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index efc11bc22..67c996058 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1873,7 +1873,7 @@ dsr1-fp4-b300-sglang: - { tp: 8, ep: 8, conc-start: 4, conc-end: 16 } dsr1-fp4-b200-trt: - image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc6.post2 + image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc14 model: nvidia/DeepSeek-R1-0528-FP4-V2 model-prefix: dsr1 runner: b200 @@ -1906,7 +1906,7 @@ dsr1-fp4-b200-trt: - { tp: 8, ep: 8, dp-attn: true, conc-start: 128, conc-end: 256 } dsr1-fp4-b200-trt-mtp: - image: nvcr.io#nvidia/tensorrt-llm/release:1.2.0rc6.post3 + image: nvcr.io#nvidia/tensorrt-llm/release:1.3.0rc14 model: nvidia/DeepSeek-R1-0528-FP4-V2 model-prefix: dsr1 runner: b200 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index cc634d1c4..bd1129f0e 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -2869,3 +2869,10 @@ - "1k1k and 8k1k STP low-latency and max-throughput srt-slurm recipes under benchmarks/multi_node/srt-slurm-recipes/sglang/glm5/b200-fp8/" - "Use the B200 local model alias glm5-fp8 mapped to /scratch/fsw/models/GLM-5-FP8" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1372 + +- config-keys: + - dsr1-fp4-b200-trt + - dsr1-fp4-b200-trt-mtp + description: + - "Update TensorRT-LLM image (off: v1.2.0rc6.post2 104d / mtp: v1.2.0rc6.post3 101d) to v1.3.0rc14 (latest pre-release)" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1489