Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion custom_ops/setup_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,48 @@
from pathlib import Path

import paddle
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
from paddle.utils.cpp_extension import (
CppExtension,
CUDAExtension,
extension_utils,
setup,
)
from setuptools import find_namespace_packages, find_packages

# Workaround for Paddle PR #78704:
# Paddle 3.5.0.dev20260418+ changed CUDAExtension behavior to auto-add gencode flags
# based on PADDLE_CUDA_ARCH_LIST even when user provides arch flags in cflags.
# This causes relocation overflow in large CUDA files (e.g., append_attention.cu).
#
# This patch suppresses Paddle's auto-gencode addition when user-provided gencode
# flags are detected, preventing duplicate architecture specifications.
_original_get_cuda_arch_flags = extension_utils._get_cuda_arch_flags


def _patched_get_cuda_arch_flags(cflags=None):
"""
Patched version that returns empty list when user-provided gencode flags are detected.

This prevents Paddle from auto-adding duplicate gencode flags based on
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 建议 flag 匹配条件过于宽松,存在误判风险

当前条件 "compute_" in flag or "sm_" in flag 会对任意包含这些子串的 flag 触发(例如编译器优化参数 --use_compute_mode=sm_xx、日志参数等非架构类 flag),可能导致在不应屏蔽时意外返回空列表。

建议收窄匹配范围,使用更精确的前缀或正则:

import re

if isinstance(flag, str) and (
    flag.startswith("-gencode")
    or re.match(r'^-arch=compute_\d+', flag)
    or re.match(r'^-code=sm_\d+', flag)
):
    return []

或至少改为更严格的前缀检查(如 "-arch=compute_" / "-code=sm_"),避免子串误匹配。

PADDLE_CUDA_ARCH_LIST, which would cause relocation overflow errors.
"""
if cflags:
for flag in cflags:
if isinstance(flag, str) and (flag.startswith("-gencode") or "compute_" in flag or "sm_" in flag):

This comment was marked as outdated.

return []
return _original_get_cuda_arch_flags(cflags)


Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❓ 疑问 cflags=None 时 patch 逻辑不生效,是否已有兜底?

当前实现在 cflags 为空/None 时直接透传给原始函数,若 Paddle 内部调用 _get_cuda_arch_flags() 时不传 cflags(例如在 _add_cuda_arch_flags 内部独立调用),第一道 patch 将无效,自动 gencode 注入仍会触发。

第二道防线(_add_cuda_arch_flags = lambda self, flags: flags)依赖 extension_utils.CUDAExtension 存在该方法,但如果 Paddle 版本不满足 hasattr 条件,两道防线均失效。

建议在注释中明确说明:

  1. cflags=None 场景是否在实际调用链中不会发生;
  2. 或者两道防线如何互补,覆盖不同的 Paddle 版本。

extension_utils._get_cuda_arch_flags = _patched_get_cuda_arch_flags


# Additional safeguard (important):
# Some Paddle versions may have additional internal methods that add gencode flags.
# This patch serves as a second line of defense by overriding such methods.
if hasattr(extension_utils, "CUDAExtension"):

This comment was marked as outdated.

if hasattr(extension_utils.CUDAExtension, "_add_cuda_arch_flags"):
extension_utils.CUDAExtension._add_cuda_arch_flags = lambda self, flags: flags


def load_module_from_path(module_name, path):
"""
Expand Down
10 changes: 5 additions & 5 deletions scripts/check_approval.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function add_failed(){
}


HAS_CUSTOM_REGISTRER=`git diff -U0 upstream/$BRANCH | grep '^\+' | grep -zoE "PD_BUILD_(STATIC_)?OP" || true`
HAS_CUSTOM_REGISTRER=`git diff --merge-base -U0 upstream/$BRANCH | grep '^\+' | grep -zoE "PD_BUILD_(STATIC_)?OP" || true`
if [ ${HAS_CUSTOM_REGISTRER} ] && [ "${PR_ID}" != "" ]; then
echo_line1="You must have one FastDeploy RD (qingqing01(dangqingqing), Jiang-Jia-Jun(jiangjiajun), heavengate(dengkaipeng)) approval for adding custom op.\n"
echo_line2="You must have one PaddlePaddle RD (jeff41404(gaoxiang), yongqiangma(mayongqiang)) approval for adding custom op.\n"
Expand All @@ -52,7 +52,7 @@ WORKER_OR_CONFIG_LIST=(
"fastdeploy/model_executor/graph_optimization"
)

HAS_WORKER_OR_CONFIG_MODIFY=`git diff upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${WORKER_OR_CONFIG_LIST[@]}") || true`
HAS_WORKER_OR_CONFIG_MODIFY=`git diff --merge-base upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${WORKER_OR_CONFIG_LIST[@]}") || true`
if [ "${HAS_WORKER_OR_CONFIG_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then
echo_line1="You must have one FastDeploy RD gongshaotian(gongshaotian) approval for modifing [$(IFS=', '; echo "${WORKER_OR_CONFIG_LIST[*]}")]."
check_approval "$echo_line1" 1 gongshaotian
Expand All @@ -63,15 +63,15 @@ SPECULATIVE_DECODING_LIST=(
"custom_ops/gpu_ops/speculate_decoding"
)

HAS_SPECULATIVE_DECODING_MODIFY=`git diff upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${SPECULATIVE_DECODING_LIST[@]}") || true`
HAS_SPECULATIVE_DECODING_MODIFY=`git diff --merge-base upstream/$BRANCH --name-only | grep -E $(printf -- "-e %s " "${SPECULATIVE_DECODING_LIST[@]}") || true`
if [ "${HAS_SPECULATIVE_DECODING_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then
echo_line1="You must have one FastDeploy RD (freeliuzc(liuzichang01), Deleter-D(wangyanpeng04)) approval for modifing [$(IFS=', '; echo "${SPECULATIVE_DECODING_LIST[*]}")]."
check_approval "$echo_line1" 1 freeliuzc Deleter-D
fi

ENV_FILE="fastdeploy/envs.py"

HAS_ENV_MODIFY=$(git diff upstream/$BRANCH --name-only | grep -E "^${ENV_FILE}$" || true)
HAS_ENV_MODIFY=$(git diff --merge-base upstream/$BRANCH --name-only | grep -E "^${ENV_FILE}$" || true)
if [ "${HAS_ENV_MODIFY}" != "" ] && [ "${PR_ID}" != "" ]; then
echo_line1="You must have one FastDeploy RD (Jiang-Jia-Jun(jiangjiajun), yuanlehome(liuyuanle), rainyfly(chenjian26), Wanglongzhi2001(wanglongzhi)) approval for modifying [${ENV_FILE}]."
check_approval "$echo_line1" 1 Jiang-Jia-Jun yuanlehome rainyfly Wanglongzhi2001
Expand All @@ -87,7 +87,7 @@ LOG_KEYWORDS=(

LOG_PATTERN="$(printf -- "%s|" "${LOG_KEYWORDS[@]}" | sed 's/|$//')"

HAS_LOG_MODIFY=$(git diff upstream/$BRANCH \
HAS_LOG_MODIFY=$(git diff --merge-base upstream/$BRANCH \
-- . ':(exclude)scripts/check_approval.sh' ':(exclude)tests/**' \
| grep -E "^\+" \
| grep -vE "^\+\+\+" \
Expand Down