getsentry · ericapisani · Apr 8, 2026 · Apr 7, 2026
@@ -0,0 +1,6 @@
+import re
+
+# Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..."
+DATA_URL_BASE64_REGEX = re.compile(
+    r"^data:(?:[a-zA-Z0-9][a-zA-Z0-9.+\-]*/[a-zA-Z0-9][a-zA-Z0-9.+\-]*)(?:;[a-zA-Z0-9\-]+=[^;,]*)*;base64,(?:[A-Za-z0-9+/\-_]+={0,2})$"
+)
@@ -4,6 +4,7 @@
 from typing import TYPE_CHECKING
 
 from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
+from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX
 
 if TYPE_CHECKING:
     from typing import Any, Callable, Dict, List, Optional, Tuple
@@ -588,6 +589,20 @@
     return 0
 
 
+def _is_image_type_with_blob_content(item: "Dict[str, Any]") -> bool:
+    """
+    Some content blocks contain an image_url property with base64 content as its value.
+    This is used to identify those while not leading to unnecessary copying of data when the image URL does not contain base64 content.
+    """
+    if item.get("type") != "image_url":
+        return False
+
+    image_url = item.get("image_url", {}).get("url", "")
+    data_url_match = DATA_URL_BASE64_REGEX.match(image_url)
+
+    return bool(data_url_match)
+
+
 def redact_blob_message_parts(
     messages: "List[Dict[str, Any]]",
 ) -> "List[Dict[str, Any]]":
@@ -640,7 +655,9 @@
         content = message.get("content")
         if isinstance(content, list):
             for item in content:
-                if isinstance(item, dict) and item.get("type") == "blob":
+                if isinstance(item, dict) and (
+                    item.get("type") == "blob" or _is_image_type_with_blob_content(item)
+                ):
                     has_blobs = True
                     break
         if has_blobs:
@@ -661,8 +678,11 @@
         content = message.get("content")
         if isinstance(content, list):
             for item in content:
-                if isinstance(item, dict) and item.get("type") == "blob":
-                    item["content"] = BLOB_DATA_SUBSTITUTE
+                if isinstance(item, dict):
+                    if item.get("type") == "blob":
+                        item["content"] = BLOB_DATA_SUBSTITUTE
+                    elif _is_image_type_with_blob_content(item):
+                        item["image_url"]["url"] = BLOB_DATA_SUBSTITUTE
 
     return messages_copy
 

@@ -1,8 +1 @@
-import re
-
 SPAN_ORIGIN = "auto.ai.pydantic_ai"
-
-# Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..."
-DATA_URL_BASE64_REGEX = re.compile(
-    r"^data:(?:[a-zA-Z0-9][a-zA-Z0-9.+\-]*/[a-zA-Z0-9][a-zA-Z0-9.+\-]*)(?:;[a-zA-Z0-9\-]+=[^;,]*)*;base64,(?:[A-Za-z0-9+/\-_]+={0,2})$"
-)
@@ -5,7 +5,7 @@
 from sentry_sdk.ai.utils import get_modality_from_mime_type
 from sentry_sdk.consts import SPANDATA
 
-from ..consts import DATA_URL_BASE64_REGEX
+from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX
 
 from typing import TYPE_CHECKING
 

@@ -814,6 +814,71 @@ def test_redacts_blobs_in_multiple_messages(self):
         assert result[1]["content"] == "I see the image."  # Unchanged
         assert result[2]["content"][1]["content"] == BLOB_DATA_SUBSTITUTE
 
+    def test_redacts_single_blob_within_image_url_content(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "text": "How many ponies do you see in the image?",
+                        "type": "text",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "data:image/jpeg;base64,/9j/4AAQSkZJRg=="},
+                    },
+                ],
+            }
+        ]
+
+        original_blob_content = messages[0]["content"][1]
+
+        result = redact_blob_message_parts(messages)
+
+        assert messages[0]["content"][1] == original_blob_content
+
+        assert (
+            result[0]["content"][0]["text"]
+            == "How many ponies do you see in the image?"
+        )
+        assert result[0]["content"][0]["type"] == "text"
+        assert result[0]["content"][1]["type"] == "image_url"
+        assert result[0]["content"][1]["image_url"]["url"] == BLOB_DATA_SUBSTITUTE
+
+    def test_does_not_redact_image_url_content_with_non_blobs(self):
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "text": "How many ponies do you see in the image?",
+                        "type": "text",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": "https://example.com/image.jpg"},
+                    },
+                ],
+            }
+        ]
+
+        original_blob_content = messages[0]["content"][1]
+
+        result = redact_blob_message_parts(messages)
+
+        assert messages[0]["content"][1] == original_blob_content
+
+        assert (
+            result[0]["content"][0]["text"]
+            == "How many ponies do you see in the image?"
+        )
+        assert result[0]["content"][0]["type"] == "text"
+        assert result[0]["content"][1]["type"] == "image_url"
+        assert (
+            result[0]["content"][1]["image_url"]["url"]
+            == "https://example.com/image.jpg"
+        )
+
     def test_no_blobs_returns_original_list(self):
         """Test that messages without blobs are returned as-is (performance optimization)"""
         messages = [