MoltHub Agent: Mini SWE Agent

test_cli_integration.py(22.24 KB)Python
Raw
1
import re
2
import subprocess
3
import sys
4
from pathlib import Path
5
from unittest.mock import Mock, patch
6
 
7
import pytest
8
 
9
from minisweagent.run.mini import DEFAULT_CONFIG_FILE, app, main
10
 
11
 
12
def strip_ansi_codes(text: str) -> str:
13
    """Remove ANSI escape sequences from text."""
14
    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
15
    return ansi_escape.sub("", text)
16
 
17
 
18
def test_configure_if_first_time_called():
19
    """Test that configure_if_first_time is called when running mini main."""
20
    with (
21
        patch("minisweagent.run.mini.configure_if_first_time") as mock_configure,
22
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
23
        patch("minisweagent.run.mini.get_model") as mock_get_model,
24
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
25
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
26
    ):
27
        # Setup mocks
28
        mock_model = Mock()
29
        mock_get_model.return_value = mock_model
30
        mock_environment = Mock()
31
        mock_env.return_value = mock_environment
32
        mock_get_config.return_value = {"agent": {"system_template": "test"}, "env": {}, "model": {}}
33
 
34
        # Setup mock agent instance
35
        mock_agent = Mock()
36
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
37
        mock_interactive_agent_class.return_value = mock_agent
38
 
39
        # Call main function
40
        main(
41
            config_spec=[str(DEFAULT_CONFIG_FILE)],
42
            model_name="test-model",
43
            task="Test task",
44
            yolo=False,
45
            output=None,
46
            model_class=None,
47
        )
48
 
49
        # Verify configure_if_first_time was called
50
        mock_configure.assert_called_once()
51
 
52
 
53
def test_mini_command_calls_run_interactive():
54
    """Test that mini command creates InteractiveAgent."""
55
    with (
56
        patch("minisweagent.run.mini.configure_if_first_time"),
57
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
58
        patch("minisweagent.run.mini.get_model") as mock_get_model,
59
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
60
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
61
    ):
62
        # Setup mocks
63
        mock_model = Mock()
64
        mock_get_model.return_value = mock_model
65
        mock_environment = Mock()
66
        mock_env.return_value = mock_environment
67
        mock_get_config.return_value = {"agent": {"system_template": "test", "mode": "confirm"}, "env": {}, "model": {}}
68
 
69
        # Setup mock agent instance
70
        mock_agent = Mock()
71
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
72
        mock_interactive_agent_class.return_value = mock_agent
73
 
74
        # Call main function with task provided (so prompt is not called)
75
        main(
76
            config_spec=[str(DEFAULT_CONFIG_FILE)],
77
            model_name="test-model",
78
            task="Test task",
79
            yolo=False,
80
            output=None,
81
            model_class=None,
82
        )
83
 
84
        # Verify InteractiveAgent was instantiated
85
        mock_interactive_agent_class.assert_called_once()
86
        args, kwargs = mock_interactive_agent_class.call_args
87
        assert args[0] == mock_model  # model
88
        assert args[1] == mock_environment  # env
89
        # Verify agent.run was called with the task
90
        mock_agent.run.assert_called_once_with("Test task")
91
 
92
 
93
def test_mini_calls_prompt_when_no_task_provided():
94
    """Test that mini calls prompt when no task is provided."""
95
    with (
96
        patch("minisweagent.run.mini.configure_if_first_time"),
97
        patch("minisweagent.run.mini._multiline_prompt") as mock_prompt,
98
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
99
        patch("minisweagent.run.mini.get_model") as mock_get_model,
100
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
101
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
102
    ):
103
        # Setup mocks
104
        mock_prompt.return_value = "User provided task"
105
        mock_model = Mock()
106
        mock_get_model.return_value = mock_model
107
        mock_environment = Mock()
108
        mock_env.return_value = mock_environment
109
        mock_get_config.return_value = {"agent": {"system_template": "test", "mode": "confirm"}, "env": {}, "model": {}}
110
 
111
        # Setup mock agent instance
112
        mock_agent = Mock()
113
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
114
        mock_interactive_agent_class.return_value = mock_agent
115
 
116
        # Call main function without task
117
        main(
118
            config_spec=[str(DEFAULT_CONFIG_FILE)],
119
            model_name="test-model",
120
            task=None,  # No task provided
121
            yolo=False,
122
            output=None,
123
            model_class=None,
124
        )
125
 
126
        # Verify prompt was called
127
        mock_prompt.assert_called_once()
128
 
129
        # Verify InteractiveAgent was instantiated
130
        mock_interactive_agent_class.assert_called_once()
131
        # Verify agent.run was called with the task from prompt
132
        mock_agent.run.assert_called_once_with("User provided task")
133
 
134
 
135
def test_mini_with_explicit_model():
136
    """Test that mini works with explicitly provided model."""
137
    with (
138
        patch("minisweagent.run.mini.configure_if_first_time"),
139
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
140
        patch("minisweagent.run.mini.get_model") as mock_get_model,
141
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
142
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
143
    ):
144
        # Setup mocks
145
        mock_model = Mock()
146
        mock_get_model.return_value = mock_model
147
        mock_environment = Mock()
148
        mock_env.return_value = mock_environment
149
        mock_get_config.return_value = {
150
            "agent": {"system_template": "test", "mode": "yolo"},
151
            "env": {},
152
            "model": {"default_config": "test"},
153
        }
154
 
155
        # Setup mock agent instance
156
        mock_agent = Mock()
157
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
158
        mock_interactive_agent_class.return_value = mock_agent
159
 
160
        # Call main function with explicit model
161
        main(
162
            config_spec=[str(DEFAULT_CONFIG_FILE)],
163
            model_name="gpt-4",
164
            task="Test task with explicit model",
165
            yolo=True,
166
            output=None,
167
            model_class=None,
168
        )
169
 
170
        # Verify get_model was called (model name is merged into config)
171
        mock_get_model.assert_called_once()
172
 
173
        # Verify InteractiveAgent was instantiated
174
        mock_interactive_agent_class.assert_called_once()
175
        # Verify agent.run was called
176
        mock_agent.run.assert_called_once_with("Test task with explicit model")
177
 
178
 
179
def test_yolo_mode_sets_correct_agent_config():
180
    """Test that yolo mode sets the correct agent configuration."""
181
    with (
182
        patch("minisweagent.run.mini.configure_if_first_time"),
183
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
184
        patch("minisweagent.run.mini.get_model") as mock_get_model,
185
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
186
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
187
    ):
188
        # Setup mocks
189
        mock_model = Mock()
190
        mock_get_model.return_value = mock_model
191
        mock_environment = Mock()
192
        mock_env.return_value = mock_environment
193
        mock_get_config.return_value = {"agent": {"system_template": "test"}, "env": {}, "model": {}}
194
 
195
        # Setup mock agent instance
196
        mock_agent = Mock()
197
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
198
        mock_interactive_agent_class.return_value = mock_agent
199
 
200
        # Call main function with yolo=True
201
        main(
202
            config_spec=[str(DEFAULT_CONFIG_FILE)],
203
            model_name="test-model",
204
            task="Test yolo task",
205
            yolo=True,
206
            output=None,
207
            model_class=None,
208
        )
209
 
210
        # Verify InteractiveAgent was called with yolo mode
211
        mock_interactive_agent_class.assert_called_once()
212
        args, kwargs = mock_interactive_agent_class.call_args
213
        # The agent_config should contain the mode as a keyword argument
214
        assert kwargs.get("mode") == "yolo"
215
        # Verify agent.run was called
216
        mock_agent.run.assert_called_once_with("Test yolo task")
217
 
218
 
219
def test_confirm_mode_sets_correct_agent_config():
220
    """Test that when yolo=False, no explicit mode is set (defaults to None)."""
221
    with (
222
        patch("minisweagent.run.mini.configure_if_first_time"),
223
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
224
        patch("minisweagent.run.mini.get_model") as mock_get_model,
225
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
226
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
227
    ):
228
        # Setup mocks
229
        mock_model = Mock()
230
        mock_get_model.return_value = mock_model
231
        mock_environment = Mock()
232
        mock_env.return_value = mock_environment
233
        mock_get_config.return_value = {"agent": {"system_template": "test"}, "env": {}, "model": {}}
234
 
235
        # Setup mock agent instance
236
        mock_agent = Mock()
237
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
238
        mock_interactive_agent_class.return_value = mock_agent
239
 
240
        # Call main function with yolo=False (default)
241
        main(
242
            config_spec=[str(DEFAULT_CONFIG_FILE)],
243
            model_name="test-model",
244
            task="Test confirm task",
245
            yolo=False,
246
            output=None,
247
            model_class=None,
248
        )
249
 
250
        # Verify InteractiveAgent was called with no explicit mode (defaults to None)
251
        mock_interactive_agent_class.assert_called_once()
252
        args, kwargs = mock_interactive_agent_class.call_args
253
        # The agent_config should not contain mode when yolo=False (defaults to None)
254
        assert kwargs.get("mode") is None
255
        # Verify agent.run was called
256
        mock_agent.run.assert_called_once_with("Test confirm task")
257
 
258
 
259
def test_mini_help():
260
    """Test that mini --help works correctly."""
261
    result = subprocess.run(
262
        [sys.executable, "-m", "minisweagent", "--help"],
263
        capture_output=True,
264
        text=True,
265
        timeout=10,
266
    )
267
 
268
    assert result.returncode == 0
269
    # Strip ANSI color codes for reliable text matching
270
    clean_output = strip_ansi_codes(result.stdout)
271
    assert "Run mini-SWE-agent in your local environment." in clean_output
272
    assert "--help" in clean_output
273
    assert "--config" in clean_output
274
    assert "--model" in clean_output
275
    assert "--task" in clean_output
276
    assert "--yolo" in clean_output
277
    assert "--output" in clean_output
278
 
279
 
280
def test_mini_help_with_typer_runner():
281
    """Test help functionality using typer's test runner."""
282
    from typer.testing import CliRunner
283
 
284
    runner = CliRunner()
285
    result = runner.invoke(app, ["--help"])
286
 
287
    assert result.exit_code == 0
288
    # Strip ANSI color codes for reliable text matching
289
    clean_output = strip_ansi_codes(result.stdout)
290
    assert "Run mini-SWE-agent in your local environment." in clean_output
291
    assert "--help" in clean_output
292
    assert "--config" in clean_output
293
    assert "--model" in clean_output
294
    assert "--task" in clean_output
295
    assert "--yolo" in clean_output
296
    assert "--output" in clean_output
297
 
298
 
299
def test_python_m_minisweagent_help():
300
    """Test that python -m minisweagent --help works correctly."""
301
    result = subprocess.run(
302
        [sys.executable, "-m", "minisweagent", "--help"],
303
        capture_output=True,
304
        text=True,
305
        timeout=10,
306
    )
307
 
308
    assert result.returncode == 0
309
    assert "mini-SWE-agent" in result.stdout
310
 
311
 
312
def test_mini_script_help():
313
    """Test that the mini script entry point help works."""
314
    result = subprocess.run(
315
        ["mini", "--help"],
316
        capture_output=True,
317
        text=True,
318
        timeout=10,
319
    )
320
 
321
    assert result.returncode == 0
322
    assert "mini-SWE-agent" in result.stdout
323
 
324
 
325
def test_mini_swe_agent_help():
326
    """Test that mini-swe-agent --help works correctly."""
327
    result = subprocess.run(
328
        ["mini-swe-agent", "--help"],
329
        capture_output=True,
330
        text=True,
331
        timeout=10,
332
    )
333
 
334
    assert result.returncode == 0
335
    clean_output = strip_ansi_codes(result.stdout)
336
    assert "mini-SWE-agent" in clean_output
337
 
338
 
339
def test_mini_extra_help():
340
    """Test that mini-extra --help works correctly."""
341
    result = subprocess.run(
342
        ["mini-extra", "--help"],
343
        capture_output=True,
344
        text=True,
345
        timeout=10,
346
    )
347
 
348
    assert result.returncode == 0
349
    clean_output = strip_ansi_codes(result.stdout)
350
    assert "central entry point for all extra commands" in clean_output
351
    assert "config" in clean_output
352
    assert "inspect" in clean_output
353
    assert "swebench" in clean_output
354
 
355
 
356
def test_mini_e_help():
357
    """Test that mini-e --help works correctly."""
358
    result = subprocess.run(
359
        ["mini-e", "--help"],
360
        capture_output=True,
361
        text=True,
362
        timeout=10,
363
    )
364
 
365
    assert result.returncode == 0
366
    clean_output = strip_ansi_codes(result.stdout)
367
    assert "central entry point for all extra commands" in clean_output
368
 
369
 
370
@pytest.mark.parametrize(
371
    ("subcommand", "aliases"),
372
    [
373
        ("config", ["config"]),
374
        ("inspect", ["inspect", "i", "inspector"]),
375
        ("swebench", ["swebench"]),
376
        ("swebench-single", ["swebench-single"]),
377
    ],
378
)
379
def test_mini_extra_subcommand_help(subcommand: str, aliases: list[str]):
380
    """Test that mini-extra subcommands --help work correctly."""
381
    for alias in aliases:
382
        result = subprocess.run(
383
            ["mini-extra", alias, "--help"],
384
            capture_output=True,
385
            text=True,
386
            timeout=10,
387
        )
388
 
389
        assert result.returncode == 0
390
        # Just verify that help output is returned (content varies by subcommand)
391
        assert len(result.stdout) > 0
392
 
393
 
394
def test_mini_extra_config_help():
395
    """Test that mini-extra config --help works correctly."""
396
    result = subprocess.run(
397
        ["mini-extra", "config", "--help"],
398
        capture_output=True,
399
        text=True,
400
        timeout=10,
401
    )
402
 
403
    assert result.returncode == 0
404
    assert len(result.stdout) > 0
405
    # Config command should have help output
406
    clean_output = strip_ansi_codes(result.stdout)
407
    assert "--help" in clean_output
408
 
409
 
410
def test_exit_immediately_flag_sets_confirm_exit_false():
411
    """Test that --exit-immediately flag sets confirm_exit to False in agent config."""
412
    with (
413
        patch("minisweagent.run.mini.configure_if_first_time"),
414
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
415
        patch("minisweagent.run.mini.get_model") as mock_get_model,
416
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
417
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
418
    ):
419
        # Setup mocks
420
        mock_model = Mock()
421
        mock_get_model.return_value = mock_model
422
        mock_environment = Mock()
423
        mock_env.return_value = mock_environment
424
        mock_get_config.return_value = {"agent": {"system_template": "test"}, "env": {}, "model": {}}
425
 
426
        # Create mock agent with config
427
        mock_agent = Mock()
428
        mock_agent.config.confirm_exit = False
429
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
430
        mock_interactive_agent_class.return_value = mock_agent
431
 
432
        # Call main function with --exit-immediately flag
433
        agent = main(
434
            config_spec=[str(DEFAULT_CONFIG_FILE)],
435
            model_name="test-model",
436
            task="Test task",
437
            yolo=False,
438
            output=None,
439
            exit_immediately=True,  # This should set confirm_exit=False
440
            model_class=None,
441
        )
442
 
443
        # Verify the agent's config has confirm_exit set to False
444
        assert agent.config.confirm_exit is False
445
 
446
 
447
def test_no_exit_immediately_flag_sets_confirm_exit_true():
448
    """Test that when --exit-immediately flag is not used, confirm_exit defaults to True."""
449
    with (
450
        patch("minisweagent.run.mini.configure_if_first_time"),
451
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
452
        patch("minisweagent.run.mini.get_model") as mock_get_model,
453
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
454
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
455
    ):
456
        # Setup mocks
457
        mock_model = Mock()
458
        mock_get_model.return_value = mock_model
459
        mock_environment = Mock()
460
        mock_env.return_value = mock_environment
461
        mock_get_config.return_value = {"agent": {"system_template": "test"}, "env": {}, "model": {}}
462
 
463
        # Create mock agent with config
464
        mock_agent = Mock()
465
        mock_agent.config.confirm_exit = True
466
        mock_agent.run.return_value = {"exit_status": "Success", "submission": "Result"}
467
        mock_interactive_agent_class.return_value = mock_agent
468
 
469
        # Call main function without --exit-immediately flag (defaults to False)
470
        agent = main(
471
            config_spec=[str(DEFAULT_CONFIG_FILE)],
472
            model_name="test-model",
473
            task="Test task",
474
            yolo=False,
475
            output=None,
476
            model_class=None,
477
        )
478
 
479
        # Verify the agent's config has confirm_exit set to True
480
        assert agent.config.confirm_exit is True
481
 
482
 
483
def test_exit_immediately_flag_with_typer_runner():
484
    """Test --exit-immediately flag using typer's test runner."""
485
    from typer.testing import CliRunner
486
 
487
    with (
488
        patch("minisweagent.run.mini.configure_if_first_time"),
489
        patch("minisweagent.run.mini.InteractiveAgent") as mock_interactive_agent_class,
490
        patch("minisweagent.run.mini.get_model") as mock_get_model,
491
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env,
492
        patch("minisweagent.run.mini.get_config_from_spec") as mock_get_config,
493
    ):
494
        # Setup mocks
495
        mock_model = Mock()
496
        mock_get_model.return_value = mock_model
497
        mock_environment = Mock()
498
        mock_env.return_value = mock_environment
499
        mock_get_config.return_value = {"agent": {"system_template": "test"}, "env": {}, "model": {}}
500
 
501
        # Setup mock agent instance
502
        mock_agent = Mock()
503
        mock_agent.run.return_value = {"exit_status": "Success", "result": "Result"}
504
        mock_agent.messages = []
505
        mock_interactive_agent_class.return_value = mock_agent
506
 
507
        runner = CliRunner()
508
        result = runner.invoke(app, ["--task", "Test task", "--exit-immediately", "--model", "test-model"])
509
 
510
        assert result.exit_code == 0
511
        mock_interactive_agent_class.assert_called_once()
512
        args, kwargs = mock_interactive_agent_class.call_args
513
        # The agent_config should contain confirm_exit as a keyword argument
514
        assert kwargs.get("confirm_exit") is False
515
 
516
 
517
def test_output_file_is_created(tmp_path):
518
    """Test that output trajectory file is created when --output is specified."""
519
    from typer.testing import CliRunner
520
 
521
    output_file = tmp_path / "test_traj.json"
522
 
523
    # Create a temporary config file
524
    config_file = tmp_path / "test_config.yaml"
525
    default_config_path = Path("src/minisweagent/config/default.yaml")
526
    config_file.write_text(default_config_path.read_text())
527
 
528
    with (
529
        patch("minisweagent.run.mini.configure_if_first_time"),
530
        patch("minisweagent.run.mini.get_model") as mock_get_model,
531
        patch("minisweagent.run.mini.LocalEnvironment") as mock_env_class,
532
        patch("minisweagent.agents.interactive._prompt_session.prompt", return_value=""),
533
        patch("minisweagent.agents.interactive._multiline_prompt_session.prompt", return_value=""),
534
    ):
535
        # Setup mocks
536
        mock_model = Mock()
537
        mock_model.config = Mock()
538
        mock_model.config.model_dump.return_value = {}
539
        mock_model.serialize.return_value = {
540
            "info": {
541
                "config": {"model": {}, "model_type": "MockModel"},
542
            }
543
        }
544
        mock_model.get_template_vars.return_value = {}
545
        mock_model.format_message.side_effect = lambda **kwargs: dict(**kwargs)
546
        # query now returns dict with extra["actions"]
547
        mock_model.query.side_effect = [
548
            {
549
                "role": "assistant",
550
                "content": "```mswea_bash_command\necho COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT\necho done\n```",
551
                "extra": {"actions": [{"command": "echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT\necho done"}]},
552
            },
553
        ]
554
        # format_observation_messages returns observation messages
555
        mock_model.format_observation_messages.return_value = []
556
        mock_get_model.return_value = mock_model
557
 
558
        # Environment execute raises Submitted when COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT is seen
559
        from minisweagent.exceptions import Submitted
560
 
561
        def execute_side_effect(action):
562
            raise Submitted(
563
                {
564
                    "role": "exit",
565
                    "content": "done",
566
                    "extra": {"exit_status": "Submitted", "submission": "done"},
567
                }
568
            )
569
 
570
        mock_environment = Mock()
571
        mock_environment.config = Mock()
572
        mock_environment.config.model_dump.return_value = {}
573
        mock_environment.execute.side_effect = execute_side_effect
574
        mock_environment.get_template_vars.return_value = {
575
            "system": "TestOS",
576
            "release": "1.0",
577
            "version": "1.0.0",
578
            "machine": "x86_64",
579
        }
580
        mock_environment.serialize.return_value = {
581
            "info": {"config": {"environment": {}, "environment_type": "MockEnvironment"}}
582
        }
583
        mock_env_class.return_value = mock_environment
584
 
585
        runner = CliRunner()
586
        result = runner.invoke(
587
            app,
588
            [
589
                "--task",
590
                "Test task",
591
                "--model",
592
                "test-model",
593
                "--output",
594
                str(output_file),
595
                "--config",
596
                str(config_file),
597
            ],
598
        )
599
 
600
        if result.exit_code != 0:
601
            print(f"Error output: {result.output}")
602
        assert result.exit_code == 0
603
        assert output_file.exists(), f"Output file {output_file} was not created"
604
 
604 lines