From 4d897724d5992422e3f404d74db25f2c64e4095c Mon Sep 17 00:00:00 2001 From: EmmonsCurse <1577972691@qq.com> Date: Mon, 20 Apr 2026 15:07:48 +0800 Subject: [PATCH 1/3] [CI] Disable auto CUDA arch injection to avoid duplicate gencode flags --- custom_ops/setup_ops.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/custom_ops/setup_ops.py b/custom_ops/setup_ops.py index dd4d4645420..6b509fdb388 100644 --- a/custom_ops/setup_ops.py +++ b/custom_ops/setup_ops.py @@ -176,6 +176,14 @@ def get_gencode_flags(archs): ] else: flags += ["-gencode", f"arch=compute_{cc_val},code=sm_{cc_val}"] + + # Workaround for Paddle PR #78704: + # Paddle 3.5.0.dev20260418+ changed CUDAExtension behavior to add + # PADDLE_CUDA_ARCH_LIST-based gencode flags even when user provides + # arch flags in cflags. Setting PADDLE_CUDA_ARCH_LIST to empty string + # prevents Paddle from auto-detecting GPUs and adding duplicate gencode flags. + os.environ["PADDLE_CUDA_ARCH_LIST"] = "" + return flags From d796e332daa9b71194ea2139877e5d802355c20a Mon Sep 17 00:00:00 2001 From: EmmonsCurse <1577972691@qq.com> Date: Mon, 20 Apr 2026 16:15:20 +0800 Subject: [PATCH 2/3] [CI] Workaround for auto CUDA arch injection --- custom_ops/setup_ops.py | 49 +++++++++++++++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/custom_ops/setup_ops.py b/custom_ops/setup_ops.py index 6b509fdb388..8400210699b 100644 --- a/custom_ops/setup_ops.py +++ b/custom_ops/setup_ops.py @@ -22,9 +22,48 @@ from pathlib import Path import paddle -from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup +from paddle.utils.cpp_extension import ( + CppExtension, + CUDAExtension, + extension_utils, + setup, +) from setuptools import find_namespace_packages, find_packages +# Workaround for Paddle PR #78704: +# Paddle 3.5.0.dev20260418+ changed CUDAExtension behavior to auto-add gencode flags +# based on PADDLE_CUDA_ARCH_LIST even when user provides arch flags in cflags. +# This causes relocation overflow in large CUDA files (e.g., append_attention.cu). +# +# This patch suppresses Paddle's auto-gencode addition when user-provided gencode +# flags are detected, preventing duplicate architecture specifications. +_original_get_cuda_arch_flags = extension_utils._get_cuda_arch_flags + + +def _patched_get_cuda_arch_flags(cflags=None): + """ + Patched version that returns empty list when user-provided gencode flags are detected. + + This prevents Paddle from auto-adding duplicate gencode flags based on + PADDLE_CUDA_ARCH_LIST, which would cause relocation overflow errors. + """ + if cflags: + for flag in cflags: + if isinstance(flag, str) and (flag.startswith("-gencode") or "compute_" in flag or "sm_" in flag): + return [] + return _original_get_cuda_arch_flags(cflags) + + +extension_utils._get_cuda_arch_flags = _patched_get_cuda_arch_flags + + +# Additional safeguard (important): +# Some Paddle versions may have additional internal methods that add gencode flags. +# This patch serves as a second line of defense by overriding such methods. +if hasattr(extension_utils, "CUDAExtension"): + if hasattr(extension_utils.CUDAExtension, "_add_cuda_arch_flags"): + extension_utils.CUDAExtension._add_cuda_arch_flags = lambda self, flags: flags + def load_module_from_path(module_name, path): """ @@ -176,14 +215,6 @@ def get_gencode_flags(archs): ] else: flags += ["-gencode", f"arch=compute_{cc_val},code=sm_{cc_val}"] - - # Workaround for Paddle PR #78704: - # Paddle 3.5.0.dev20260418+ changed CUDAExtension behavior to add - # PADDLE_CUDA_ARCH_LIST-based gencode flags even when user provides - # arch flags in cflags. Setting PADDLE_CUDA_ARCH_LIST to empty string - # prevents Paddle from auto-detecting GPUs and adding duplicate gencode flags. - os.environ["PADDLE_CUDA_ARCH_LIST"] = "" - return flags From 11886c0ad8589f0eaccb14cc0c47250d5a96a2bf Mon Sep 17 00:00:00 2001 From: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Date: Thu, 23 Apr 2026 21:42:00 +0800 Subject: [PATCH 3/3] Update check_approval.sh --- scripts/check_approval.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/check_approval.sh b/scripts/check_approval.sh index 8311b2bdac5..493e126d6cd 100644 --- a/scripts/check_approval.sh +++ b/scripts/check_approval.sh @@ -40,7 +40,7 @@ function add_failed(){ } -HAS_CUSTOM_REGISTRER=`git diff -U0 upstream/$BRANCH | grep '^\+' | grep -zoE "PD_BUILD_(STATIC_)?OP" || true` +HAS_CUSTOM_REGISTRER=`git diff --merge-base -U0 upstream/$BRANCH | grep '^\+' | grep -zoE "PD_BUILD_(STATIC_)?OP" || true` if [ ${HAS_CUSTOM_REGISTRER} ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD (qingqing01(dangqingqing), Jiang-Jia-Jun(jiangjiajun), heavengate(dengkaipeng)) approval for adding custom op.\n" echo_line2="You must have one PaddlePaddle RD (jeff41404(gaoxiang), yongqiangma(mayongqiang)) approval for adding custom op.\n" @@ -52,7 +52,7 @@ WORKER_OR_CONFIG_LIST=( "fastdeploy/model_executor/graph_optimization" ) -HAS_WORKER_OR_CONFIG_MODIFY=`git diff upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${WORKER_OR_CONFIG_LIST[@]}") || true` +HAS_WORKER_OR_CONFIG_MODIFY=`git diff --merge-base upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${WORKER_OR_CONFIG_LIST[@]}") || true` if [ "${HAS_WORKER_OR_CONFIG_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD gongshaotian(gongshaotian) approval for modifing [$(IFS=', '; echo "${WORKER_OR_CONFIG_LIST[*]}")]." check_approval "$echo_line1" 1 gongshaotian @@ -63,7 +63,7 @@ SPECULATIVE_DECODING_LIST=( "custom_ops/gpu_ops/speculate_decoding" ) -HAS_SPECULATIVE_DECODING_MODIFY=`git diff upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${SPECULATIVE_DECODING_LIST[@]}") || true` +HAS_SPECULATIVE_DECODING_MODIFY=`git diff --merge-base upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${SPECULATIVE_DECODING_LIST[@]}") || true` if [ "${HAS_SPECULATIVE_DECODING_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD (freeliuzc(liuzichang01), Deleter-D(wangyanpeng04)) approval for modifing [$(IFS=', '; echo "${SPECULATIVE_DECODING_LIST[*]}")]." check_approval "$echo_line1" 1 freeliuzc Deleter-D @@ -71,7 +71,7 @@ fi ENV_FILE="fastdeploy/envs.py" -HAS_ENV_MODIFY=$(git diff upstream/$BRANCH --name-only | grep -E "^${ENV_FILE}$" || true) +HAS_ENV_MODIFY=$(git diff --merge-base upstream/$BRANCH --name-only | grep -E "^${ENV_FILE}$" || true) if [ "${HAS_ENV_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD (Jiang-Jia-Jun(jiangjiajun), yuanlehome(liuyuanle), rainyfly(chenjian26), Wanglongzhi2001(wanglongzhi)) approval for modifying [${ENV_FILE}]." check_approval "$echo_line1" 1 Jiang-Jia-Jun yuanlehome rainyfly Wanglongzhi2001 @@ -87,7 +87,7 @@ LOG_KEYWORDS=( LOG_PATTERN="$(printf -- "%s|" "${LOG_KEYWORDS[@]}" | sed 's/|$//')" -HAS_LOG_MODIFY=$(git diff upstream/$BRANCH \ +HAS_LOG_MODIFY=$(git diff --merge-base upstream/$BRANCH \ -- . ':(exclude)scripts/check_approval.sh' ':(exclude)tests/**' \ | grep -E "^\+" \ | grep -vE "^\+\+\+" \