| 1 | from dataclasses import dataclass
|
| 2 | from pathlib import Path
|
| 3 |
|
| 4 | import yaml
|
| 5 | from jinja2 import StrictUndefined, Template
|
| 6 |
|
| 7 | from minisweagent.agents.default import AgentConfig
|
| 8 |
|
| 9 |
|
| 10 | @dataclass
|
| 11 | class MockOutput:
|
| 12 | """Mock output object for testing the template"""
|
| 13 |
|
| 14 | returncode: int
|
| 15 | output: str
|
| 16 | exception_info: str = ""
|
| 17 |
|
| 18 |
|
| 19 | def test_observation_template_short_output():
|
| 20 | """Test that short output (< 10000 chars) is displayed in full"""
|
| 21 | # Load the swebench config
|
| 22 | config_path = (
|
| 23 | Path(__file__).parent.parent.parent
|
| 24 | / "src"
|
| 25 | / "minisweagent"
|
| 26 | / "config"
|
| 27 | / "benchmarks"
|
| 28 | / "swebench_backticks.yaml"
|
| 29 | )
|
| 30 | with open(config_path) as f:
|
| 31 | config = yaml.safe_load(f)
|
| 32 |
|
| 33 | # Extract the template (now in model section)
|
| 34 | template_str = config["model"]["observation_template"]
|
| 35 | template = Template(template_str, undefined=StrictUndefined)
|
| 36 |
|
| 37 | # Create mock output with short content
|
| 38 | output = MockOutput(returncode=0, output="Success! Operation completed.\nWarning: minor issue")
|
| 39 |
|
| 40 | # Render the template
|
| 41 | result = template.render(output=output)
|
| 42 |
|
| 43 | # Verify the result contains all parts and no truncation
|
| 44 | assert "<returncode>" in result
|
| 45 | assert "0" in result
|
| 46 | assert "<output>" in result
|
| 47 | assert "Success! Operation completed." in result
|
| 48 | assert "Warning: minor issue" in result
|
| 49 |
|
| 50 | # Should not contain truncation elements for short output
|
| 51 | assert "<output_head>" not in result
|
| 52 | assert "<elided_chars>" not in result
|
| 53 | assert "<output_tail>" not in result
|
| 54 | assert "<warning>" not in result
|
| 55 |
|
| 56 |
|
| 57 | def test_observation_template_long_output():
|
| 58 | """Test that long output (> 10000 chars) is truncated with head/tail format"""
|
| 59 | # Load the swebench config
|
| 60 | config_path = (
|
| 61 | Path(__file__).parent.parent.parent
|
| 62 | / "src"
|
| 63 | / "minisweagent"
|
| 64 | / "config"
|
| 65 | / "benchmarks"
|
| 66 | / "swebench_backticks.yaml"
|
| 67 | )
|
| 68 | with open(config_path) as f:
|
| 69 | config = yaml.safe_load(f)
|
| 70 |
|
| 71 | # Extract the template (now in model section)
|
| 72 | template_str = config["model"]["observation_template"]
|
| 73 | template = Template(template_str, undefined=StrictUndefined)
|
| 74 |
|
| 75 | # Create mock output with long content
|
| 76 | long_output = "A" * 8000 + "B" * 3000 # 11000 characters total
|
| 77 | # Total will be > 10000 chars
|
| 78 |
|
| 79 | output = MockOutput(returncode=1, output=long_output)
|
| 80 |
|
| 81 | # Render the template
|
| 82 | result = template.render(output=output)
|
| 83 |
|
| 84 | # Should contain truncation elements for long output
|
| 85 | assert "<warning>" in result
|
| 86 | assert "The output of your last command was too long" in result
|
| 87 | assert "<output_head>" in result
|
| 88 | assert "<elided_chars>" in result
|
| 89 | assert "characters elided" in result
|
| 90 | assert "<output_tail>" in result
|
| 91 |
|
| 92 | # Should still contain the basic structure
|
| 93 | assert "<returncode>" in result
|
| 94 | assert "1" in result
|
| 95 |
|
| 96 | # Verify the head contains first part of output
|
| 97 | head_start = result.find("<output_head>")
|
| 98 | head_end = result.find("</output_head>")
|
| 99 | head_content = result[head_start:head_end]
|
| 100 | assert "AAAA" in head_content # Should contain start of output
|
| 101 |
|
| 102 | # Verify the tail contains last part of output
|
| 103 | tail_start = result.find("<output_tail>")
|
| 104 | tail_end = result.find("</output_tail>")
|
| 105 | tail_content = result[tail_start:tail_end]
|
| 106 | assert "BBBB" in tail_content # Should contain end of output
|
| 107 |
|
| 108 |
|
| 109 | def test_observation_template_edge_case_exactly_10000_chars():
|
| 110 | """Test the boundary case where output is around 10000 characters"""
|
| 111 | # Load the swebench config
|
| 112 | config_path = (
|
| 113 | Path(__file__).parent.parent.parent
|
| 114 | / "src"
|
| 115 | / "minisweagent"
|
| 116 | / "config"
|
| 117 | / "benchmarks"
|
| 118 | / "swebench_backticks.yaml"
|
| 119 | )
|
| 120 | with open(config_path) as f:
|
| 121 | config = yaml.safe_load(f)
|
| 122 |
|
| 123 | # Extract the template (now in model section)
|
| 124 | template_str = config["model"]["observation_template"]
|
| 125 | template = Template(template_str, undefined=StrictUndefined)
|
| 126 |
|
| 127 | # Use a large amount of data that will definitely exceed 10000 chars when rendered
|
| 128 | output = MockOutput(returncode=0, output="X" * 10000)
|
| 129 |
|
| 130 | # Render the template
|
| 131 | result = template.render(output=output)
|
| 132 |
|
| 133 | # Should use truncated format for large output
|
| 134 | assert "<output_head>" in result
|
| 135 | assert "<elided_chars>" in result
|
| 136 | assert "<output_tail>" in result
|
| 137 | assert "<warning>" in result
|
| 138 | # The X's should still be present in head or tail
|
| 139 | assert "XXXX" in result
|
| 140 |
|
| 141 |
|
| 142 | def test_observation_template_just_under_10000_chars():
|
| 143 | """Test that smaller output shows full output without truncation"""
|
| 144 | # Load the swebench config
|
| 145 | config_path = (
|
| 146 | Path(__file__).parent.parent.parent
|
| 147 | / "src"
|
| 148 | / "minisweagent"
|
| 149 | / "config"
|
| 150 | / "benchmarks"
|
| 151 | / "swebench_backticks.yaml"
|
| 152 | )
|
| 153 | with open(config_path) as f:
|
| 154 | config = yaml.safe_load(f)
|
| 155 |
|
| 156 | # Extract the template (now in model section)
|
| 157 | template_str = config["model"]["observation_template"]
|
| 158 | template = Template(template_str, undefined=StrictUndefined)
|
| 159 |
|
| 160 | # Use a reasonably sized output that should be well under 10000 chars when rendered
|
| 161 | output = MockOutput(returncode=0, output="Y" * 8000)
|
| 162 |
|
| 163 | # Render the template
|
| 164 | result = template.render(output=output)
|
| 165 |
|
| 166 | # Should show full output without truncation
|
| 167 | assert "<output_head>" not in result
|
| 168 | assert "<elided_chars>" not in result
|
| 169 | assert "<output_tail>" not in result
|
| 170 | assert "<warning>" not in result
|
| 171 | assert "Y" * 8000 in result
|
| 172 |
|
| 173 |
|
| 174 | def test_agent_config_requires_templates():
|
| 175 | """Test that AgentConfig now requires all template fields (no defaults in code)"""
|
| 176 | import pytest
|
| 177 | from pydantic import ValidationError
|
| 178 |
|
| 179 | # AgentConfig should require all template fields now (Pydantic raises ValidationError)
|
| 180 | with pytest.raises(ValidationError, match="validation error"):
|
| 181 | AgentConfig()
|
| 182 |
|