-
Notifications
You must be signed in to change notification settings - Fork 749
[CI] Disable auto CUDA arch injection to avoid duplicate gencode flags #7513
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,9 +22,48 @@ | |
| from pathlib import Path | ||
|
|
||
| import paddle | ||
| from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup | ||
| from paddle.utils.cpp_extension import ( | ||
| CppExtension, | ||
| CUDAExtension, | ||
| extension_utils, | ||
| setup, | ||
| ) | ||
| from setuptools import find_namespace_packages, find_packages | ||
|
|
||
| # Workaround for Paddle PR #78704: | ||
| # Paddle 3.5.0.dev20260418+ changed CUDAExtension behavior to auto-add gencode flags | ||
| # based on PADDLE_CUDA_ARCH_LIST even when user provides arch flags in cflags. | ||
| # This causes relocation overflow in large CUDA files (e.g., append_attention.cu). | ||
| # | ||
| # This patch suppresses Paddle's auto-gencode addition when user-provided gencode | ||
| # flags are detected, preventing duplicate architecture specifications. | ||
| _original_get_cuda_arch_flags = extension_utils._get_cuda_arch_flags | ||
|
|
||
|
|
||
| def _patched_get_cuda_arch_flags(cflags=None): | ||
| """ | ||
| Patched version that returns empty list when user-provided gencode flags are detected. | ||
|
|
||
| This prevents Paddle from auto-adding duplicate gencode flags based on | ||
| PADDLE_CUDA_ARCH_LIST, which would cause relocation overflow errors. | ||
| """ | ||
| if cflags: | ||
| for flag in cflags: | ||
| if isinstance(flag, str) and (flag.startswith("-gencode") or "compute_" in flag or "sm_" in flag): | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| return [] | ||
| return _original_get_cuda_arch_flags(cflags) | ||
|
|
||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ❓ 疑问 当前实现在 第二道防线( 建议在注释中明确说明:
|
||
| extension_utils._get_cuda_arch_flags = _patched_get_cuda_arch_flags | ||
|
|
||
|
|
||
| # Additional safeguard (important): | ||
| # Some Paddle versions may have additional internal methods that add gencode flags. | ||
| # This patch serves as a second line of defense by overriding such methods. | ||
| if hasattr(extension_utils, "CUDAExtension"): | ||
This comment was marked as outdated.
Sorry, something went wrong. |
||
| if hasattr(extension_utils.CUDAExtension, "_add_cuda_arch_flags"): | ||
| extension_utils.CUDAExtension._add_cuda_arch_flags = lambda self, flags: flags | ||
|
|
||
|
|
||
| def load_module_from_path(module_name, path): | ||
| """ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 建议 flag 匹配条件过于宽松,存在误判风险
当前条件
"compute_" in flag or "sm_" in flag会对任意包含这些子串的 flag 触发(例如编译器优化参数--use_compute_mode=sm_xx、日志参数等非架构类 flag),可能导致在不应屏蔽时意外返回空列表。建议收窄匹配范围,使用更精确的前缀或正则:
或至少改为更严格的前缀检查(如
"-arch=compute_"/"-code=sm_"),避免子串误匹配。