braintrustdata · Stephen Belanger (Qard) · Apr 22, 2026 · Apr 24, 2026
diff --git a/.changeset/brave-clouds-trace.md b/.changeset/brave-clouds-trace.md
@@ -0,0 +1,5 @@
+---
+"braintrust": patch
+---
+
+feat: Add OpenAI Agents SDK auto-instrumentation
diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json
@@ -9,6 +9,14 @@
       { "variantKey": "openai-v6", "label": "v6" }
     ]
   },
+  {
+    "scenarioDirName": "openai-agents-instrumentation",
+    "label": "OpenAI Agents Instrumentation",
+    "metadataScenario": "openai-agents-instrumentation",
+    "variants": [
+      { "variantKey": "openai-agents-auto-hook", "label": "Auto-hook" }
+    ]
+  },
   {
     "scenarioDirName": "anthropic-instrumentation",
     "label": "Anthropic Instrumentation",

diff --git a/e2e/scenarios/openai-agents-instrumentation/assertions.ts b/e2e/scenarios/openai-agents-instrumentation/assertions.ts
@@ -0,0 +1,113 @@
+import { beforeAll, describe, expect, test } from "vitest";
+import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server";
+import { withScenarioHarness } from "../../helpers/scenario-harness";
+import {
+  findChildSpans,
+  findLatestChildSpan,
+  findLatestSpan,
+} from "../../helpers/trace-selectors";
+import {
+  AGENT_NAME,
+  FINAL_OUTPUT,
+  MODEL_NAME,
+  OPERATION_NAME,
+  ROOT_NAME,
+  SCENARIO_NAME,
+  TOOL_NAME,
+} from "./constants.mjs";
+
+type RunOpenAIAgentsScenario = (harness: {
+  runNodeScenarioDir: (options: {
+    entry: string;
+    env?: Record<string, string>;
+    nodeArgs: string[];
+    scenarioDir: string;
+    timeoutMs: number;
+  }) => Promise<unknown>;
+}) => Promise<void>;
+
+function findModelSpans(
+  events: CapturedLogEvent[],
+  parentId: string | undefined,
+): CapturedLogEvent[] {
+  return [
+    ...findChildSpans(events, "Response", parentId),
+    ...findChildSpans(events, "Generation", parentId),
+  ];
+}
+
+export function defineOpenAIAgentsAutoInstrumentationAssertions(options: {
+  name: string;
+  runScenario: RunOpenAIAgentsScenario;
+  timeoutMs: number;
+}): void {
+  describe(options.name, () => {
+    let events: CapturedLogEvent[] = [];
+
+    beforeAll(async () => {
+      await withScenarioHarness(async (harness) => {
+        await options.runScenario(harness);
+        events = harness.events();
+      });
+    }, options.timeoutMs);
+
+    test(
+      "captures OpenAI Agents spans through the auto-hook setup",
+      { timeout: options.timeoutMs },
+      () => {
+        const root = findLatestSpan(events, ROOT_NAME);
+        const operation = findLatestSpan(events, OPERATION_NAME);
+        const workflow = findLatestChildSpan(
+          events,
+          "Agent workflow",
+          operation?.span.id,
+        );
+        const agent = findLatestChildSpan(
+          events,
+          AGENT_NAME,
+          workflow?.span.id,
+        );
+        const modelSpans = findModelSpans(events, agent?.span.id);
+        const toolSpan = findLatestChildSpan(events, TOOL_NAME, agent?.span.id);
+
+        expect(root).toBeDefined();
+        expect(root?.row.metadata).toMatchObject({
+          scenario: SCENARIO_NAME,
+        });
+        expect(operation).toBeDefined();
+        expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]);
+
+        expect(workflow).toBeDefined();
+        expect(workflow?.span.type).toBe("task");
+        expect(workflow?.span.parentIds).toEqual([operation?.span.id ?? ""]);
+
+        expect(agent).toBeDefined();
+        expect(agent?.span.type).toBe("task");
+        expect(agent?.row.metadata).toMatchObject({
+          tools: [TOOL_NAME],
+          output_type: "text",
+        });
+
+        expect(modelSpans.length).toBeGreaterThanOrEqual(1);
+        for (const modelSpan of modelSpans) {
+          expect(modelSpan.span.type).toBe("llm");
+          expect(String(modelSpan.row.metadata?.model)).toContain(MODEL_NAME);
+          expect(modelSpan.metrics).toMatchObject({
+            completion_tokens: expect.any(Number),
+            prompt_tokens: expect.any(Number),
+            tokens: expect.any(Number),
+          });
+          expect(modelSpan.input).toEqual(
+            expect.arrayContaining([expect.anything()]),
+          );
+          expect(modelSpan.output).toBeDefined();
+        }
+
+        expect(toolSpan).toBeDefined();
+        expect(toolSpan?.span.type).toBe("tool");
+        expect(toolSpan?.input).toBe(JSON.stringify({ city: "Vienna" }));
+        expect(toolSpan?.output).toBe(FINAL_OUTPUT);
+      },
+    );
+  });
+}
diff --git a/e2e/scenarios/openai-agents-instrumentation/constants.mjs b/e2e/scenarios/openai-agents-instrumentation/constants.mjs
@@ -0,0 +1,7 @@
+export const ROOT_NAME = "openai-agents-auto-instrumentation-root";
+export const SCENARIO_NAME = "openai-agents-instrumentation";
+export const OPERATION_NAME = "openai-agents-run-operation";
+export const AGENT_NAME = "Weather Agent";
+export const MODEL_NAME = "gpt-4o-mini";
+export const TOOL_NAME = "lookup_weather";
+export const FINAL_OUTPUT = "Sunny in Vienna";
diff --git a/e2e/scenarios/openai-agents-instrumentation/package.json b/e2e/scenarios/openai-agents-instrumentation/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "openai-agents-instrumentation-scenario",
+  "version": "0.0.0",
+  "private": true,
+  "type": "module",
+  "dependencies": {
+    "@openai/agents": "0.0.14",
+    "zod": "3.25.67"
+  },
+  "braintrustScenario": {
+    "canary": {
+      "dependencies": {
+        "@openai/agents": "latest",
+        "zod": "zod@^4.0.0"
+      }
+    }
+  }
+}