Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion kubernetes/scripts/create_x509_user_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,4 +121,4 @@ def build_kubectl_config(tmpdir: str) -> None:
Path(tmpdir, f"{user}.config"), # from
Path(f"{user}.config"), # to
)
print(f"Config generated. Saved to {user}.config in current directory.") # noqa: T201
print(f"Config generated. Saved to {user}.config in current directory.")
2 changes: 1 addition & 1 deletion kubernetes/scripts/lint_manifests.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ def get_all_manifests() -> list[str]:
arg = " -f ".join([""] + get_all_manifests())
os.system("kubectl diff" + arg) # noqa: S605
elif sys.argv[1] == "find":
print("\n".join(get_all_manifests())) # noqa: T201
print("\n".join(get_all_manifests()))
13 changes: 13 additions & 0 deletions kubernetes/scripts/memray_profile/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Probe image for on-demand memory profiling of k8s Python services.
# Multi-platform build required (dev machines are arm64, cluster is amd64):
#
# docker buildx build --platform linux/amd64,linux/arm64 --push \
# -t ghcr.io/python-discord/memray-probe:latest \
# kubernetes/scripts/memray_profile/
FROM python:3.14-slim

RUN apt-get update && apt-get install -y --no-install-recommends util-linux \
&& rm -rf /var/lib/apt/lists/*
RUN pip install --no-cache-dir memray==1.19.3

ENTRYPOINT ["/bin/sh"]
Empty file.
100 changes: 100 additions & 0 deletions kubernetes/scripts/memray_profile/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
"""
Profile memory usage of a running Kubernetes Python service with memray.

Injects an ephemeral debug container, uses sys.remote_exec to start a
memray.Tracker in the target process, waits for the trace, and copies
a flamegraph report back locally.

python -m memray_profile deploy/king-arthur -n bots
python -m memray_profile deploy/site -n web --duration 60
"""

import argparse
import subprocess
from datetime import UTC, datetime
from pathlib import Path

from ._constants import PROBE_REPORT, PROBE_TRACE, TARGET_TRACE
from ._kubectl import die, kubectl
from ._pod import find_python_pid, get_containers, resolve_pod
from ._probe import inject_probe, inject_tracker, wait_for_probe


def main() -> None:
p = argparse.ArgumentParser(description="Profile memory of a k8s Python service with memray.")
p.add_argument("target", help="Pod name or workload ref (deploy/x, sts/x)")
p.add_argument("-n", "--namespace", default="default")
p.add_argument("-c", "--container", help="Target container (default: first)")
p.add_argument("-p", "--pid", type=int, help="Skip PID auto-detection")
p.add_argument("-d", "--duration", type=int, default=30, metavar="SEC")
p.add_argument("--report-type", choices=["flamegraph", "tree"], default="flamegraph")
p.add_argument("--trace-path", default=TARGET_TRACE)
p.add_argument("--raw", action="store_true", help="Copy raw .bin instead of rendered report")
p.add_argument("--output-dir", type=Path, default=Path.cwd())
args = p.parse_args()

pod = resolve_pod(args.target, args.namespace)
containers = get_containers(pod, args.namespace)
container = args.container or containers[0]
if container not in containers:
die(f"Container {container!r} not in {pod}. Have: {', '.join(containers)}")
print(f"Target: {pod} / {container}")

ts = datetime.now(UTC).strftime("%Y%m%d%H%M%S")
probe = f"memray-{ts}"

inject_probe(pod, args.namespace, container, probe)
wait_for_probe(pod, args.namespace, probe)

if args.pid:
pid = args.pid
else:
pid = find_python_pid(pod, args.namespace, probe)
print(f"Python PID: {pid}")

inject_tracker(pod, args.namespace, probe, pid, args.duration, args.trace_path)

# Grab output
args.output_dir.mkdir(parents=True, exist_ok=True)
trace_on_target = f"/proc/{pid}/root{args.trace_path}"

if args.raw:
out = args.output_dir / f"memray_{pod}_{ts}.bin"
src = trace_on_target
else:
# Copy trace into the probe container, render the report there
kubectl(
"exec",
pod,
"-n",
args.namespace,
"-c",
probe,
"--",
"sh",
"-c",
f"cp {trace_on_target} {PROBE_TRACE}",
capture=True,
check=True,
)
if args.report_type == "flamegraph":
report_cmd = f"memray flamegraph -o {PROBE_REPORT} {PROBE_TRACE}"
else:
report_cmd = f"memray tree {PROBE_TRACE} > {PROBE_REPORT}"
print(f"Generating {args.report_type}...")
kubectl("exec", pod, "-n", args.namespace, "-c", probe, "--", "sh", "-c", report_cmd, capture=False)

suffix = ".html" if args.report_type == "flamegraph" else ".txt"
out = args.output_dir / f"memray_{pod}_{ts}{suffix}"
src = PROBE_REPORT

print(f"Copying to {out}...")
subprocess.run( # noqa: S603
["kubectl", "cp", "-n", args.namespace, "-c", probe, f"{pod}:{src}", str(out)],
check=True,
)
print(f"\nDone: {out}")


if __name__ == "__main__":
main()
6 changes: 6 additions & 0 deletions kubernetes/scripts/memray_profile/_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
PROBE_IMAGE = "ghcr.io/python-discord/memray-probe:latest"
MEMRAY_VERSION = "1.19.3"
PROBE_TRACE = "/tmp/memray_trace.bin" # noqa: S108
PROBE_REPORT = "/tmp/memray_report.html" # noqa: S108
TARGET_TRACE = "/tmp/memray_trace.bin" # noqa: S108
READY_MARKER = "MEMRAY_PROBE_READY"
21 changes: 21 additions & 0 deletions kubernetes/scripts/memray_profile/_kubectl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import subprocess
import sys
from typing import NoReturn


def die(msg: str) -> NoReturn:
print(f"Error: {msg}", file=sys.stderr)
sys.exit(1)


def kubectl(*args: str, capture: bool = True, check: bool = True) -> subprocess.CompletedProcess:
try:
return subprocess.run( # noqa: S603
["kubectl", *args],
capture_output=capture,
text=True,
check=check,
)
except subprocess.CalledProcessError as exc:
stderr = (exc.stderr or "").strip().rsplit("\n", 1)[-1] or f"exit code {exc.returncode}"
die(f"kubectl {' '.join(args[:3])}... failed: {stderr}")
77 changes: 77 additions & 0 deletions kubernetes/scripts/memray_profile/_pod.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
import json

from ._kubectl import die, kubectl

_WORKLOAD_KINDS = {
"deploy": "deployments",
"deployment": "deployments",
"sts": "statefulsets",
"statefulset": "statefulsets",
}

_FIND_PIDS_SH = r"""
for d in /proc/[0-9]*/; do
pid=$(basename "$d")
exe=$(readlink "$d/exe" 2>/dev/null) || true
case "${exe:-$(cut -d '' -f1 < "$d/cmdline" 2>/dev/null)}" in
*python*) printf '%s %s\n' "$pid" "$(tr '\0' ' ' < "$d/cmdline" 2>/dev/null)" ;;
esac
done
"""


def resolve_pod(target: str, namespace: str) -> str:
if "/" not in target:
return target

kind, name = target.split("/", 1)
resource = _WORKLOAD_KINDS.get(kind.lower())
if not resource:
die(f"Unsupported resource kind {kind!r}. Use a pod name, deploy/, or sts/")

workload = json.loads(kubectl("get", resource, name, "-n", namespace, "-o", "json").stdout)
labels = workload["spec"]["selector"]["matchLabels"]
selector = ",".join(f"{k}={v}" for k, v in labels.items())

result = kubectl(
"get",
"pods",
"-n",
namespace,
"-l",
selector,
"--field-selector=status.phase=Running",
"-o",
"jsonpath={.items[0].metadata.name}",
)
pod = result.stdout.strip()
if not pod:
die(f"No running pods for {target} in {namespace!r}")
return pod


def get_containers(pod: str, namespace: str) -> list[str]:
result = kubectl("get", "pod", pod, "-n", namespace, "-o", "jsonpath={.spec.containers[*].name}")
return result.stdout.strip().split()


def find_python_pid(pod: str, namespace: str, probe: str) -> int:
result = kubectl("exec", pod, "-n", namespace, "-c", probe, "--", "sh", "-c", _FIND_PIDS_SH)
entries = []
for line in result.stdout.strip().splitlines():
pid_str, _, cmdline = line.partition(" ")
entries.append((int(pid_str), cmdline.strip()))

if not entries:
die(f"No Python process found in {pod}")

for pid, cmdline in entries:
print(f" PID {pid}: {cmdline}")

# Prefer non-PID-1 processes (PID 1 is usually tini/dumb-init)
candidates = [(p, c) for p, c in entries if p != 1] or entries

if len(candidates) > 1:
die(f"Multiple Python PIDs found: {', '.join(str(p) for p, _ in candidates)}. Use --pid.")

return candidates[0][0]
142 changes: 142 additions & 0 deletions kubernetes/scripts/memray_profile/_probe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import json
import time

from ._constants import MEMRAY_VERSION, PROBE_IMAGE, READY_MARKER
from ._kubectl import die, kubectl


def inject_probe(pod: str, namespace: str, target_container: str, probe_name: str) -> None:
startup = (
f"python3 -c 'import memray' 2>/dev/null || pip install -q memray=={MEMRAY_VERSION} && "
f"echo {READY_MARKER} && sleep 3600"
)
spec = json.dumps(
{
"spec": {
"ephemeralContainers": [
{
"name": probe_name,
"image": PROBE_IMAGE,
"command": ["/bin/sh", "-c", startup],
"targetContainerName": target_container,
"securityContext": {
"capabilities": {"add": ["SYS_PTRACE", "SYS_ADMIN"]},
"seccompProfile": {"type": "Unconfined"},
"runAsUser": 0,
"runAsNonRoot": False,
"allowPrivilegeEscalation": True,
},
}
]
}
}
)
kubectl(
"patch",
"pod",
pod,
"-n",
namespace,
"--subresource=ephemeralcontainers",
"--type=strategic",
"-p",
spec,
capture=False,
check=True,
)


def wait_for_probe(pod: str, namespace: str, probe_name: str, timeout: int = 120) -> None:
print("Waiting for probe...")
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
raw = kubectl("get", "pod", pod, "-n", namespace, "-o", "json")
statuses = json.loads(raw.stdout).get("status", {}).get("ephemeralContainerStatuses", [])
status = next((s for s in statuses if s["name"] == probe_name), None)

if status:
state = status.get("state", {})
if "terminated" in state:
die(f"Probe exited early (code {state['terminated'].get('exitCode', '?')})")
if "running" in state:
logs = kubectl("logs", pod, "-n", namespace, "-c", probe_name, check=False)
if READY_MARKER in logs.stdout:
print("Probe ready.")
return

time.sleep(3)

die(f"Probe didn't start within {timeout}s")


def inject_tracker(pod: str, namespace: str, probe: str, pid: int, duration: int, trace_path: str) -> None:
"""Write a memray script into the target and use sys.remote_exec to run it."""
if pid == 1:
die("Can't profile PID 1 — add tini or dumb-init so Python isn't the init process.")

inject = "/tmp/_memray_inject.py" # noqa: S108
script = (
"import memray as _m, builtins as _b, threading as _t, time as _time\n"
f"_b._memray_tracker = _m.Tracker('{trace_path}', native_traces=True, trace_python_allocators=True)\n"
"_b._memray_tracker.__enter__()\n"
"def _stop():\n"
f" _time.sleep({duration})\n"
" if hasattr(_b, '_memray_tracker'):\n"
" _b._memray_tracker.__exit__(None, None, None)\n"
" del _b._memray_tracker\n"
"_t.Thread(target=_stop, daemon=True).start()\n"
)

# Place it in the target's filesystem via /proc/<pid>/root
kubectl(
"exec",
pod,
"-n",
namespace,
"-c",
probe,
"--",
"sh",
"-c",
f"cat > /proc/{pid}/root{inject} << 'EOF'\n{script}EOF",
capture=True,
check=True,
)

# sys.remote_exec needs to read the target's ELF .so files, so we nsenter
# the mount namespace and run the target's own python (via its PATH).
print(f"Attaching to PID {pid}...")
kubectl(
"exec",
pod,
"-n",
namespace,
"-c",
probe,
"--",
"sh",
"-c",
f"target_path=$(tr '\\0' '\\n' < /proc/{pid}/environ | grep '^PATH=' | head -1 | cut -d= -f2-) && "
f'nsenter --mount=/proc/{pid}/ns/mnt -- env PATH="$target_path" '
f"python -c \"import sys; sys.remote_exec({pid}, '{inject}')\"",
capture=False,
check=True,
)

print(f"Profiling for {duration}s...")
time.sleep(duration + 2)

kubectl(
"exec",
pod,
"-n",
namespace,
"-c",
probe,
"--",
"rm",
"-f",
f"/proc/{pid}/root{inject}",
capture=True,
check=False,
)
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ ignore = [
"COM812", "COM819", "D206", "E111", "E114", "E117", "E501", "ISC001", "Q000", "Q001", "Q002", "Q003", "W191",
]

[tool.ruff.lint.extend-per-file-ignores]
"kubernetes/scripts/**/*" = ["D103", "PLR0913", "S607", "T201"]

[tool.ruff.lint.isort]
order-by-type = false
case-sensitive = true
Expand Down
Loading