Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sentry_sdk/integrations/pyreqwest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sentry_sdk import start_span
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.integrations import DidNotEnable, Integration
from sentry_sdk.scope import should_send_default_pii
from sentry_sdk.traces import StreamedSpan
from sentry_sdk.tracing import BAGGAGE_HEADER_NAME
from sentry_sdk.tracing_utils import (
Expand Down Expand Up @@ -95,7 +96,7 @@ def _sentry_pyreqwest_span(request: "Request") -> "Generator[Any, None, None]":
SPANDATA.HTTP_REQUEST_METHOD: request.method,
},
) as span:
Comment thread
sentry-warden[bot] marked this conversation as resolved.
if parsed_url is not None:
if parsed_url is not None and should_send_default_pii():

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: The non-streamed request path in the pyreqwest integration does not respect the should_send_default_pii() setting, leading to potential PII leakage in URL data.
Severity: HIGH

Suggested Fix

Add the if should_send_default_pii(): check around the lines that set url, HTTP_QUERY, and HTTP_FRAGMENT data in the non-streamed path (lines 131-137), mirroring the implementation in the streamed path.

Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent. Verify if this is a real issue. If it is, propose a fix; if not, explain why it's
not valid.

Location: sentry_sdk/integrations/pyreqwest.py#L99

Potential issue: The pyreqwest integration has two code paths for creating spans: a
streamed path and a non-streamed path. The pull request adds a check for
`should_send_default_pii()` to prevent leaking URL data in the streamed path. However,
the same check is missing from the non-streamed path (lines 131-137). As a result, when
`send_default_pii` is set to `False`, the non-streamed path will still send `url`,
`HTTP_QUERY`, and `HTTP_FRAGMENT` data, which may contain personally identifiable
information (PII), contrary to the user's configuration.

Did we get this right? 👍 / 👎 to inform future reviews.

span.set_attribute(SPANDATA.URL_FULL, parsed_url.url)
span.set_attribute(SPANDATA.URL_QUERY, parsed_url.query)
span.set_attribute(SPANDATA.URL_FRAGMENT, parsed_url.fragment)
Expand Down
45 changes: 39 additions & 6 deletions tests/integrations/pyreqwest/test_pyreqwest.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,17 +59,20 @@ def clear_captured_requests():
PyreqwestMockHandler.captured_requests.clear()


@pytest.mark.parametrize("send_default_pii", [True, False])
@pytest.mark.parametrize("span_streaming", [True, False])
def test_sync_client_spans(
sentry_init,
capture_events,
capture_items,
server_port,
span_streaming,
send_default_pii,
):
sentry_init(
integrations=[PyreqwestIntegration()],
traces_sample_rate=1.0,
send_default_pii=send_default_pii,
_experiments={"trace_lifecycle": "stream" if span_streaming else "static"},
)

Expand All @@ -88,12 +91,21 @@ def test_sync_client_spans(
span = spans[0]
assert span["attributes"]["sentry.op"] == "http.client"
assert span["name"] == f"GET http://localhost:{server_port}/hello"
assert span["attributes"]["url.full"] == f"http://localhost:{server_port}/hello"
assert span["attributes"][SPANDATA.HTTP_REQUEST_METHOD] == "GET"
assert span["attributes"][SPANDATA.HTTP_STATUS_CODE] == 200
assert span["attributes"][SPANDATA.URL_QUERY] == "q=test"
assert span["attributes"][SPANDATA.URL_FRAGMENT] == "frag"
assert span["attributes"]["sentry.origin"] == "auto.http.pyreqwest"

if send_default_pii:
assert (
span["attributes"]["url.full"]
== f"http://localhost:{server_port}/hello"
)
assert span["attributes"][SPANDATA.URL_QUERY] == "q=test"
assert span["attributes"][SPANDATA.URL_FRAGMENT] == "frag"
else:
assert "url.full" not in span["attributes"]
assert SPANDATA.URL_QUERY not in span["attributes"]
assert SPANDATA.URL_FRAGMENT not in span["attributes"]
else:
events = capture_events()

Expand All @@ -116,17 +128,20 @@ def test_sync_client_spans(


@pytest.mark.asyncio
@pytest.mark.parametrize("send_default_pii", [True, False])
@pytest.mark.parametrize("span_streaming", [True, False])
async def test_async_client_spans(
sentry_init,
capture_events,
capture_items,
server_port,
span_streaming,
send_default_pii,
):
sentry_init(
integrations=[PyreqwestIntegration()],
traces_sample_rate=1.0,
send_default_pii=send_default_pii,
_experiments={"trace_lifecycle": "stream" if span_streaming else "static"},
)

Expand All @@ -145,10 +160,14 @@ async def test_async_client_spans(
span = spans[0]
assert span["attributes"]["sentry.op"] == "http.client"
assert span["name"] == f"GET {url}"
assert span["attributes"]["url.full"] == url
assert span["attributes"][SPANDATA.HTTP_REQUEST_METHOD] == "GET"
assert span["attributes"][SPANDATA.HTTP_STATUS_CODE] == 200
assert span["attributes"]["sentry.origin"] == "auto.http.pyreqwest"

if send_default_pii:
assert span["attributes"]["url.full"] == url
else:
assert "url.full" not in span["attributes"]
else:
events = capture_events()

Expand All @@ -168,17 +187,20 @@ async def test_async_client_spans(
assert span["origin"] == "auto.http.pyreqwest"


@pytest.mark.parametrize("send_default_pii", [True, False])
@pytest.mark.parametrize("span_streaming", [True, False])
def test_sync_simple_request_spans(
sentry_init,
capture_events,
capture_items,
server_port,
span_streaming,
send_default_pii,
):
sentry_init(
integrations=[PyreqwestIntegration()],
traces_sample_rate=1.0,
send_default_pii=send_default_pii,
_experiments={"trace_lifecycle": "stream" if span_streaming else "static"},
)

Expand All @@ -196,10 +218,14 @@ def test_sync_simple_request_spans(
span = spans[0]
assert span["attributes"]["sentry.op"] == "http.client"
assert span["name"] == f"GET {url}"
assert span["attributes"]["url.full"] == url
assert span["attributes"][SPANDATA.HTTP_REQUEST_METHOD] == "GET"
assert span["attributes"][SPANDATA.HTTP_STATUS_CODE] == 200
assert span["attributes"]["sentry.origin"] == "auto.http.pyreqwest"

if send_default_pii:
assert span["attributes"]["url.full"] == url
else:
assert "url.full" not in span["attributes"]
else:
events = capture_events()

Expand All @@ -219,17 +245,20 @@ def test_sync_simple_request_spans(


@pytest.mark.asyncio
@pytest.mark.parametrize("send_default_pii", [True, False])
@pytest.mark.parametrize("span_streaming", [True, False])
async def test_async_simple_request_spans(
sentry_init,
capture_events,
capture_items,
server_port,
span_streaming,
send_default_pii,
):
sentry_init(
integrations=[PyreqwestIntegration()],
traces_sample_rate=1.0,
send_default_pii=send_default_pii,
_experiments={"trace_lifecycle": "stream" if span_streaming else "static"},
)

Expand All @@ -247,10 +276,14 @@ async def test_async_simple_request_spans(
span = spans[0]
assert span["attributes"]["sentry.op"] == "http.client"
assert span["name"] == f"GET {url}"
assert span["attributes"]["url.full"] == url
assert span["attributes"][SPANDATA.HTTP_REQUEST_METHOD] == "GET"
assert span["attributes"][SPANDATA.HTTP_STATUS_CODE] == 200
assert span["attributes"]["sentry.origin"] == "auto.http.pyreqwest"

if send_default_pii:
assert span["attributes"]["url.full"] == url
else:
assert "url.full" not in span["attributes"]
else:
events = capture_events()

Expand Down
Loading