diff --git a/custom_ops/setup_ops.py b/custom_ops/setup_ops.py index dd4d4645420..8400210699b 100644 --- a/custom_ops/setup_ops.py +++ b/custom_ops/setup_ops.py @@ -22,9 +22,48 @@ from pathlib import Path import paddle -from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup +from paddle.utils.cpp_extension import ( + CppExtension, + CUDAExtension, + extension_utils, + setup, +) from setuptools import find_namespace_packages, find_packages +# Workaround for Paddle PR #78704: +# Paddle 3.5.0.dev20260418+ changed CUDAExtension behavior to auto-add gencode flags +# based on PADDLE_CUDA_ARCH_LIST even when user provides arch flags in cflags. +# This causes relocation overflow in large CUDA files (e.g., append_attention.cu). +# +# This patch suppresses Paddle's auto-gencode addition when user-provided gencode +# flags are detected, preventing duplicate architecture specifications. +_original_get_cuda_arch_flags = extension_utils._get_cuda_arch_flags + + +def _patched_get_cuda_arch_flags(cflags=None): + """ + Patched version that returns empty list when user-provided gencode flags are detected. + + This prevents Paddle from auto-adding duplicate gencode flags based on + PADDLE_CUDA_ARCH_LIST, which would cause relocation overflow errors. + """ + if cflags: + for flag in cflags: + if isinstance(flag, str) and (flag.startswith("-gencode") or "compute_" in flag or "sm_" in flag): + return [] + return _original_get_cuda_arch_flags(cflags) + + +extension_utils._get_cuda_arch_flags = _patched_get_cuda_arch_flags + + +# Additional safeguard (important): +# Some Paddle versions may have additional internal methods that add gencode flags. +# This patch serves as a second line of defense by overriding such methods. +if hasattr(extension_utils, "CUDAExtension"): + if hasattr(extension_utils.CUDAExtension, "_add_cuda_arch_flags"): + extension_utils.CUDAExtension._add_cuda_arch_flags = lambda self, flags: flags + def load_module_from_path(module_name, path): """ diff --git a/scripts/check_approval.sh b/scripts/check_approval.sh index 8311b2bdac5..493e126d6cd 100644 --- a/scripts/check_approval.sh +++ b/scripts/check_approval.sh @@ -40,7 +40,7 @@ function add_failed(){ } -HAS_CUSTOM_REGISTRER=`git diff -U0 upstream/$BRANCH | grep '^\+' | grep -zoE "PD_BUILD_(STATIC_)?OP" || true` +HAS_CUSTOM_REGISTRER=`git diff --merge-base -U0 upstream/$BRANCH | grep '^\+' | grep -zoE "PD_BUILD_(STATIC_)?OP" || true` if [ ${HAS_CUSTOM_REGISTRER} ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD (qingqing01(dangqingqing), Jiang-Jia-Jun(jiangjiajun), heavengate(dengkaipeng)) approval for adding custom op.\n" echo_line2="You must have one PaddlePaddle RD (jeff41404(gaoxiang), yongqiangma(mayongqiang)) approval for adding custom op.\n" @@ -52,7 +52,7 @@ WORKER_OR_CONFIG_LIST=( "fastdeploy/model_executor/graph_optimization" ) -HAS_WORKER_OR_CONFIG_MODIFY=`git diff upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${WORKER_OR_CONFIG_LIST[@]}") || true` +HAS_WORKER_OR_CONFIG_MODIFY=`git diff --merge-base upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${WORKER_OR_CONFIG_LIST[@]}") || true` if [ "${HAS_WORKER_OR_CONFIG_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD gongshaotian(gongshaotian) approval for modifing [$(IFS=', '; echo "${WORKER_OR_CONFIG_LIST[*]}")]." check_approval "$echo_line1" 1 gongshaotian @@ -63,7 +63,7 @@ SPECULATIVE_DECODING_LIST=( "custom_ops/gpu_ops/speculate_decoding" ) -HAS_SPECULATIVE_DECODING_MODIFY=`git diff upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${SPECULATIVE_DECODING_LIST[@]}") || true` +HAS_SPECULATIVE_DECODING_MODIFY=`git diff --merge-base upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${SPECULATIVE_DECODING_LIST[@]}") || true` if [ "${HAS_SPECULATIVE_DECODING_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD (freeliuzc(liuzichang01), Deleter-D(wangyanpeng04)) approval for modifing [$(IFS=', '; echo "${SPECULATIVE_DECODING_LIST[*]}")]." check_approval "$echo_line1" 1 freeliuzc Deleter-D @@ -71,7 +71,7 @@ fi ENV_FILE="fastdeploy/envs.py" -HAS_ENV_MODIFY=$(git diff upstream/$BRANCH --name-only | grep -E "^${ENV_FILE}$" || true) +HAS_ENV_MODIFY=$(git diff --merge-base upstream/$BRANCH --name-only | grep -E "^${ENV_FILE}$" || true) if [ "${HAS_ENV_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then echo_line1="You must have one FastDeploy RD (Jiang-Jia-Jun(jiangjiajun), yuanlehome(liuyuanle), rainyfly(chenjian26), Wanglongzhi2001(wanglongzhi)) approval for modifying [${ENV_FILE}]." check_approval "$echo_line1" 1 Jiang-Jia-Jun yuanlehome rainyfly Wanglongzhi2001 @@ -87,7 +87,7 @@ LOG_KEYWORDS=( LOG_PATTERN="$(printf -- "%s|" "${LOG_KEYWORDS[@]}" | sed 's/|$//')" -HAS_LOG_MODIFY=$(git diff upstream/$BRANCH \ +HAS_LOG_MODIFY=$(git diff --merge-base upstream/$BRANCH \ -- . ':(exclude)scripts/check_approval.sh' ':(exclude)tests/**' \ | grep -E "^\+" \ | grep -vE "^\+\+\+" \