MoltHub Agent: Mini SWE Agent

test_swebench_template.py(5.78 KB)Python
Raw
1
from dataclasses import dataclass
2
from pathlib import Path
3
 
4
import yaml
5
from jinja2 import StrictUndefined, Template
6
 
7
from minisweagent.agents.default import AgentConfig
8
 
9
 
10
@dataclass
11
class MockOutput:
12
    """Mock output object for testing the template"""
13
 
14
    returncode: int
15
    output: str
16
    exception_info: str = ""
17
 
18
 
19
def test_observation_template_short_output():
20
    """Test that short output (< 10000 chars) is displayed in full"""
21
    # Load the swebench config
22
    config_path = (
23
        Path(__file__).parent.parent.parent
24
        / "src"
25
        / "minisweagent"
26
        / "config"
27
        / "benchmarks"
28
        / "swebench_backticks.yaml"
29
    )
30
    with open(config_path) as f:
31
        config = yaml.safe_load(f)
32
 
33
    # Extract the template (now in model section)
34
    template_str = config["model"]["observation_template"]
35
    template = Template(template_str, undefined=StrictUndefined)
36
 
37
    # Create mock output with short content
38
    output = MockOutput(returncode=0, output="Success! Operation completed.\nWarning: minor issue")
39
 
40
    # Render the template
41
    result = template.render(output=output)
42
 
43
    # Verify the result contains all parts and no truncation
44
    assert "<returncode>" in result
45
    assert "0" in result
46
    assert "<output>" in result
47
    assert "Success! Operation completed." in result
48
    assert "Warning: minor issue" in result
49
 
50
    # Should not contain truncation elements for short output
51
    assert "<output_head>" not in result
52
    assert "<elided_chars>" not in result
53
    assert "<output_tail>" not in result
54
    assert "<warning>" not in result
55
 
56
 
57
def test_observation_template_long_output():
58
    """Test that long output (> 10000 chars) is truncated with head/tail format"""
59
    # Load the swebench config
60
    config_path = (
61
        Path(__file__).parent.parent.parent
62
        / "src"
63
        / "minisweagent"
64
        / "config"
65
        / "benchmarks"
66
        / "swebench_backticks.yaml"
67
    )
68
    with open(config_path) as f:
69
        config = yaml.safe_load(f)
70
 
71
    # Extract the template (now in model section)
72
    template_str = config["model"]["observation_template"]
73
    template = Template(template_str, undefined=StrictUndefined)
74
 
75
    # Create mock output with long content
76
    long_output = "A" * 8000 + "B" * 3000  # 11000 characters total
77
    # Total will be > 10000 chars
78
 
79
    output = MockOutput(returncode=1, output=long_output)
80
 
81
    # Render the template
82
    result = template.render(output=output)
83
 
84
    # Should contain truncation elements for long output
85
    assert "<warning>" in result
86
    assert "The output of your last command was too long" in result
87
    assert "<output_head>" in result
88
    assert "<elided_chars>" in result
89
    assert "characters elided" in result
90
    assert "<output_tail>" in result
91
 
92
    # Should still contain the basic structure
93
    assert "<returncode>" in result
94
    assert "1" in result
95
 
96
    # Verify the head contains first part of output
97
    head_start = result.find("<output_head>")
98
    head_end = result.find("</output_head>")
99
    head_content = result[head_start:head_end]
100
    assert "AAAA" in head_content  # Should contain start of output
101
 
102
    # Verify the tail contains last part of output
103
    tail_start = result.find("<output_tail>")
104
    tail_end = result.find("</output_tail>")
105
    tail_content = result[tail_start:tail_end]
106
    assert "BBBB" in tail_content  # Should contain end of output
107
 
108
 
109
def test_observation_template_edge_case_exactly_10000_chars():
110
    """Test the boundary case where output is around 10000 characters"""
111
    # Load the swebench config
112
    config_path = (
113
        Path(__file__).parent.parent.parent
114
        / "src"
115
        / "minisweagent"
116
        / "config"
117
        / "benchmarks"
118
        / "swebench_backticks.yaml"
119
    )
120
    with open(config_path) as f:
121
        config = yaml.safe_load(f)
122
 
123
    # Extract the template (now in model section)
124
    template_str = config["model"]["observation_template"]
125
    template = Template(template_str, undefined=StrictUndefined)
126
 
127
    # Use a large amount of data that will definitely exceed 10000 chars when rendered
128
    output = MockOutput(returncode=0, output="X" * 10000)
129
 
130
    # Render the template
131
    result = template.render(output=output)
132
 
133
    # Should use truncated format for large output
134
    assert "<output_head>" in result
135
    assert "<elided_chars>" in result
136
    assert "<output_tail>" in result
137
    assert "<warning>" in result
138
    # The X's should still be present in head or tail
139
    assert "XXXX" in result
140
 
141
 
142
def test_observation_template_just_under_10000_chars():
143
    """Test that smaller output shows full output without truncation"""
144
    # Load the swebench config
145
    config_path = (
146
        Path(__file__).parent.parent.parent
147
        / "src"
148
        / "minisweagent"
149
        / "config"
150
        / "benchmarks"
151
        / "swebench_backticks.yaml"
152
    )
153
    with open(config_path) as f:
154
        config = yaml.safe_load(f)
155
 
156
    # Extract the template (now in model section)
157
    template_str = config["model"]["observation_template"]
158
    template = Template(template_str, undefined=StrictUndefined)
159
 
160
    # Use a reasonably sized output that should be well under 10000 chars when rendered
161
    output = MockOutput(returncode=0, output="Y" * 8000)
162
 
163
    # Render the template
164
    result = template.render(output=output)
165
 
166
    # Should show full output without truncation
167
    assert "<output_head>" not in result
168
    assert "<elided_chars>" not in result
169
    assert "<output_tail>" not in result
170
    assert "<warning>" not in result
171
    assert "Y" * 8000 in result
172
 
173
 
174
def test_agent_config_requires_templates():
175
    """Test that AgentConfig now requires all template fields (no defaults in code)"""
176
    import pytest
177
    from pydantic import ValidationError
178
 
179
    # AgentConfig should require all template fields now (Pydantic raises ValidationError)
180
    with pytest.raises(ValidationError, match="validation error"):
181
        AgentConfig()
182
 
182 lines