Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/brave-clouds-trace.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"braintrust": patch
---

feat: Add OpenAI Agents SDK auto-instrumentation
8 changes: 8 additions & 0 deletions e2e/config/pr-comment-scenarios.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@
{ "variantKey": "openai-v6", "label": "v6" }
]
},
{
"scenarioDirName": "openai-agents-instrumentation",
"label": "OpenAI Agents Instrumentation",
"metadataScenario": "openai-agents-instrumentation",
"variants": [
{ "variantKey": "openai-agents-auto-hook", "label": "Auto-hook" }
]
},
{
"scenarioDirName": "anthropic-instrumentation",
"label": "Anthropic Instrumentation",
Expand Down
113 changes: 113 additions & 0 deletions e2e/scenarios/openai-agents-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { beforeAll, describe, expect, test } from "vitest";
import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server";
import { withScenarioHarness } from "../../helpers/scenario-harness";
import {
findChildSpans,
findLatestChildSpan,
findLatestSpan,
} from "../../helpers/trace-selectors";
import {
AGENT_NAME,
FINAL_OUTPUT,
MODEL_NAME,
OPERATION_NAME,
ROOT_NAME,
SCENARIO_NAME,
TOOL_NAME,
} from "./constants.mjs";

type RunOpenAIAgentsScenario = (harness: {
runNodeScenarioDir: (options: {
entry: string;
env?: Record<string, string>;
nodeArgs: string[];
scenarioDir: string;
timeoutMs: number;
}) => Promise<unknown>;
}) => Promise<void>;

function findModelSpans(
events: CapturedLogEvent[],
parentId: string | undefined,
): CapturedLogEvent[] {
return [
...findChildSpans(events, "Response", parentId),
...findChildSpans(events, "Generation", parentId),
];
}

export function defineOpenAIAgentsAutoInstrumentationAssertions(options: {
name: string;
runScenario: RunOpenAIAgentsScenario;
timeoutMs: number;
}): void {
describe(options.name, () => {
let events: CapturedLogEvent[] = [];

beforeAll(async () => {
await withScenarioHarness(async (harness) => {
await options.runScenario(harness);
events = harness.events();
});
}, options.timeoutMs);

test(
"captures OpenAI Agents spans through the auto-hook setup",
{ timeout: options.timeoutMs },
() => {
const root = findLatestSpan(events, ROOT_NAME);
const operation = findLatestSpan(events, OPERATION_NAME);
const workflow = findLatestChildSpan(
events,
"Agent workflow",
operation?.span.id,
);
const agent = findLatestChildSpan(
events,
AGENT_NAME,
workflow?.span.id,
);
const modelSpans = findModelSpans(events, agent?.span.id);
const toolSpan = findLatestChildSpan(events, TOOL_NAME, agent?.span.id);

expect(root).toBeDefined();
expect(root?.row.metadata).toMatchObject({
scenario: SCENARIO_NAME,
});
expect(operation).toBeDefined();
expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]);

expect(workflow).toBeDefined();
expect(workflow?.span.type).toBe("task");
expect(workflow?.span.parentIds).toEqual([operation?.span.id ?? ""]);

expect(agent).toBeDefined();
expect(agent?.span.type).toBe("task");
expect(agent?.row.metadata).toMatchObject({
tools: [TOOL_NAME],
output_type: "text",
});

expect(modelSpans.length).toBeGreaterThanOrEqual(1);
for (const modelSpan of modelSpans) {
expect(modelSpan.span.type).toBe("llm");
expect(String(modelSpan.row.metadata?.model)).toContain(MODEL_NAME);
expect(modelSpan.metrics).toMatchObject({
completion_tokens: expect.any(Number),
prompt_tokens: expect.any(Number),
tokens: expect.any(Number),
});
expect(modelSpan.input).toEqual(
expect.arrayContaining([expect.anything()]),
);
expect(modelSpan.output).toBeDefined();
}

expect(toolSpan).toBeDefined();
expect(toolSpan?.span.type).toBe("tool");
expect(toolSpan?.input).toBe(JSON.stringify({ city: "Vienna" }));
expect(toolSpan?.output).toBe(FINAL_OUTPUT);
},
);
});
}
7 changes: 7 additions & 0 deletions e2e/scenarios/openai-agents-instrumentation/constants.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
export const ROOT_NAME = "openai-agents-auto-instrumentation-root";
export const SCENARIO_NAME = "openai-agents-instrumentation";
export const OPERATION_NAME = "openai-agents-run-operation";
export const AGENT_NAME = "Weather Agent";
export const MODEL_NAME = "gpt-4o-mini";
export const TOOL_NAME = "lookup_weather";
export const FINAL_OUTPUT = "Sunny in Vienna";
18 changes: 18 additions & 0 deletions e2e/scenarios/openai-agents-instrumentation/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "openai-agents-instrumentation-scenario",
"version": "0.0.0",
"private": true,
"type": "module",
"dependencies": {
"@openai/agents": "0.0.14",
"zod": "3.25.67"
},
"braintrustScenario": {
"canary": {
"dependencies": {
"@openai/agents": "latest",
"zod": "zod@^4.0.0"
}
}
}
}
Loading
Loading