From 88059b1caa0b508af7527d6a7d6b228e2354bc99 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:44:18 +0000 Subject: [PATCH 1/3] fix: Prevent context attributes from influencing judge template parsing Co-Authored-By: jbailey@launchdarkly.com --- .../sdk/server-ai/src/ldai/judge/__init__.py | 15 ++-- packages/sdk/server-ai/tests/test_judge.py | 86 +++++++++++++++++++ 2 files changed, 96 insertions(+), 5 deletions(-) diff --git a/packages/sdk/server-ai/src/ldai/judge/__init__.py b/packages/sdk/server-ai/src/ldai/judge/__init__.py index a842db65..054c7cb2 100644 --- a/packages/sdk/server-ai/src/ldai/judge/__init__.py +++ b/packages/sdk/server-ai/src/ldai/judge/__init__.py @@ -3,8 +3,6 @@ import random from typing import Any, Dict, Optional -import chevron - from ldai import log from ldai.judge.evaluation_schema_builder import EvaluationSchemaBuilder from ldai.models import AIJudgeConfig, LDMessage @@ -163,14 +161,21 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: """ - Interpolates message content with variables using Mustache templating. + Interpolates message content with variables using simple string replacement. + + Uses literal string replacement instead of a template engine to prevent + template injection: attacker-controlled values from pass 1 (e.g. Mustache + delimiter-change tags like {{=[ ]=}}) would otherwise be interpreted as + control syntax by a second Mustache pass, blinding the judge. :param content: The message content template :param variables: Variables to interpolate :return: Interpolated message content """ - # Use chevron (Mustache) for templating, with no escaping - return chevron.render(content, variables) + result = content + for key, value in variables.items(): + result = result.replace('{{' + key + '}}', value) + return result def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]: """ diff --git a/packages/sdk/server-ai/tests/test_judge.py b/packages/sdk/server-ai/tests/test_judge.py index e61ac4a0..a87b9a63 100644 --- a/packages/sdk/server-ai/tests/test_judge.py +++ b/packages/sdk/server-ai/tests/test_judge.py @@ -617,3 +617,89 @@ def tracked_variation(key, context, default): assert len(variation_calls) == 1, f"Expected 1 variation call, got {len(variation_calls)}" assert config is not None assert config.evaluation_metric_key == '$ld:ai:judge:from-flag' + + +class TestJudgeTemplateInjection: + """Regression tests for template injection vulnerability. + + These tests verify that the judge's message interpolation uses simple string + replacement instead of Mustache templating. Attacker-controlled values from + pass 1 (e.g. Mustache delimiter-change tags) must be treated as inert literal + text by pass 2. + """ + + def _make_judge(self, content: str, tracker, mock_runner) -> Judge: + """Helper to create a Judge with a single message containing the given content.""" + config = AIJudgeConfig( + key='test-judge', + enabled=True, + evaluation_metric_key='metric', + messages=[LDMessage(role='user', content=content)], + model=ModelConfig('gpt-4'), + provider=ProviderConfig('openai'), + ) + return Judge(config, tracker, mock_runner) + + @pytest.mark.parametrize('name,payload', [ + ('delimiter change brackets', '{{=[ ]=}}'), + ('delimiter change angle', '{{=<% %>=}}'), + ('partial', '{{> evil}}'), + ('comment', '{{! drop everything }}'), + ('triple stache', '{{{raw}}}'), + ('section', '{{#section}}inject{{/section}}'), + ('inverted section', '{{^section}}inject{{/section}}'), + ]) + def test_injection_variants_in_message_history( + self, name: str, payload: str, tracker: LDAIConfigTracker, mock_runner + ): + """Mustache control sequences injected via context must not blind the judge.""" + after_pass1 = f'Auditing {payload}: ' + '{{message_history}}' + + judge = self._make_judge(after_pass1, tracker, mock_runner) + messages = judge._construct_evaluation_messages('ACTUAL HISTORY', 'some output') + + assert len(messages) == 1 + assert 'ACTUAL HISTORY' in messages[0].content, \ + f'payload {payload!r} must not blind the judge to the actual history' + assert '{{message_history}}' not in messages[0].content, \ + f'placeholder must be fully substituted after payload {payload!r}' + + def test_injection_via_response(self, tracker: LDAIConfigTracker, mock_runner): + """Injection payloads in the response being evaluated are equally neutralized.""" + after_pass1 = 'History: {{message_history}}\nResponse: {{response_to_evaluate}}' + + judge = self._make_judge(after_pass1, tracker, mock_runner) + malicious_response = '{{=[ ]=}} INJECTION ATTEMPT' + messages = judge._construct_evaluation_messages('normal history', malicious_response) + + assert len(messages) == 1 + assert malicious_response in messages[0].content, \ + 'malicious content in response must appear verbatim' + assert '{{response_to_evaluate}}' not in messages[0].content, \ + 'response placeholder must be fully substituted' + + def test_multiple_placeholder_occurrences(self, tracker: LDAIConfigTracker, mock_runner): + """When a template contains the same placeholder more than once, every occurrence is substituted.""" + template = '{{message_history}} | {{message_history}}' + + judge = self._make_judge(template, tracker, mock_runner) + messages = judge._construct_evaluation_messages('HISTORY', 'RESPONSE') + + assert len(messages) == 1 + assert messages[0].content == 'HISTORY | HISTORY' + + def test_mustache_syntax_in_content(self, tracker: LDAIConfigTracker, mock_runner): + """Mustache-like syntax inside history or response values is preserved verbatim.""" + template = 'History: {{message_history}}\nResponse: {{response_to_evaluate}}' + + judge = self._make_judge(template, tracker, mock_runner) + history_with_mustache = 'How do I use {{user}} in Mustache?' + response_with_mustache = 'Use {{user}} like this: {{#user}}Hello{{/user}}' + + messages = judge._construct_evaluation_messages(history_with_mustache, response_with_mustache) + + assert len(messages) == 1 + assert history_with_mustache in messages[0].content, \ + 'Mustache-like syntax in history must be preserved verbatim' + assert response_with_mustache in messages[0].content, \ + 'Mustache-like syntax in response must be preserved verbatim' From 1ad110e4031176617d4166868c8395c2c677943d Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:47:07 +0000 Subject: [PATCH 2/3] fix: simplify docstring per review feedback Co-Authored-By: jbailey@launchdarkly.com --- packages/sdk/server-ai/src/ldai/judge/__init__.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/packages/sdk/server-ai/src/ldai/judge/__init__.py b/packages/sdk/server-ai/src/ldai/judge/__init__.py index 054c7cb2..81181b43 100644 --- a/packages/sdk/server-ai/src/ldai/judge/__init__.py +++ b/packages/sdk/server-ai/src/ldai/judge/__init__.py @@ -160,13 +160,8 @@ def _construct_evaluation_messages(self, input_text: str, output_text: str) -> l return messages def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: - """ - Interpolates message content with variables using simple string replacement. - - Uses literal string replacement instead of a template engine to prevent - template injection: attacker-controlled values from pass 1 (e.g. Mustache - delimiter-change tags like {{=[ ]=}}) would otherwise be interpreted as - control syntax by a second Mustache pass, blinding the judge. + """Use string replacement to prevent context attributes like {{=[ ]=}}) + from influencing judge template parsing. :param content: The message content template :param variables: Variables to interpolate From b91fa467b6e29747664d598046efb86526167fad Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 21:53:53 +0000 Subject: [PATCH 3/3] fix: use simultaneous regex substitution to prevent cross-placeholder injection Co-Authored-By: jbailey@launchdarkly.com --- packages/sdk/server-ai/src/ldai/judge/__init__.py | 10 ++++++---- packages/sdk/server-ai/tests/test_judge.py | 11 +++++++++++ 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/packages/sdk/server-ai/src/ldai/judge/__init__.py b/packages/sdk/server-ai/src/ldai/judge/__init__.py index 81181b43..69f2fbac 100644 --- a/packages/sdk/server-ai/src/ldai/judge/__init__.py +++ b/packages/sdk/server-ai/src/ldai/judge/__init__.py @@ -1,6 +1,7 @@ """Judge implementation for AI evaluation.""" import random +import re from typing import Any, Dict, Optional from ldai import log @@ -167,10 +168,11 @@ def _interpolate_message(self, content: str, variables: Dict[str, str]) -> str: :param variables: Variables to interpolate :return: Interpolated message content """ - result = content - for key, value in variables.items(): - result = result.replace('{{' + key + '}}', value) - return result + return re.sub( + r'\{\{(\w+)\}\}', + lambda match: variables.get(match.group(1), match.group(0)), + content, + ) def _parse_evaluation_response(self, data: Dict[str, Any]) -> Dict[str, EvalScore]: """ diff --git a/packages/sdk/server-ai/tests/test_judge.py b/packages/sdk/server-ai/tests/test_judge.py index a87b9a63..8b88db65 100644 --- a/packages/sdk/server-ai/tests/test_judge.py +++ b/packages/sdk/server-ai/tests/test_judge.py @@ -688,6 +688,17 @@ def test_multiple_placeholder_occurrences(self, tracker: LDAIConfigTracker, mock assert len(messages) == 1 assert messages[0].content == 'HISTORY | HISTORY' + def test_cross_placeholder_injection(self, tracker: LDAIConfigTracker, mock_runner): + """A message_history value containing {{response_to_evaluate}} must not be expanded.""" + template = 'History: {{message_history}}\nResponse: {{response_to_evaluate}}' + + judge = self._make_judge(template, tracker, mock_runner) + messages = judge._construct_evaluation_messages('{{response_to_evaluate}}', 'REAL OUTPUT') + + assert len(messages) == 1 + assert messages[0].content == 'History: {{response_to_evaluate}}\nResponse: REAL OUTPUT', \ + 'literal {{response_to_evaluate}} in history value must not be expanded' + def test_mustache_syntax_in_content(self, tracker: LDAIConfigTracker, mock_runner): """Mustache-like syntax inside history or response values is preserved verbatim.""" template = 'History: {{message_history}}\nResponse: {{response_to_evaluate}}'