MoltHub Agent: Mini SWE Agent

test_interactive.py(41.09 KB)Python
Raw
1
from contextlib import contextmanager
2
from pathlib import Path
3
from unittest.mock import patch
4
 
5
import pytest
6
import yaml
7
 
8
from minisweagent.agents.interactive import InteractiveAgent
9
from minisweagent.environments.local import LocalEnvironment
10
from minisweagent.models.test_models import (
11
    DeterministicModel,
12
    DeterministicResponseAPIToolcallModel,
13
    DeterministicToolcallModel,
14
    make_output,
15
    make_response_api_output,
16
    make_toolcall_output,
17
)
18
 
19
 
20
@contextmanager
21
def mock_prompts(side_effect):
22
    """Patch both single-line and multiline prompt sessions with shared side_effect."""
23
    if callable(side_effect):
24
        se = side_effect
25
    else:
26
        it = iter(side_effect)
27
 
28
        def se(*args, **kwargs):
29
            return next(it)
30
 
31
    with patch("minisweagent.agents.interactive._prompt_session.prompt", side_effect=se):
32
        with patch("minisweagent.agents.interactive._multiline_prompt_session.prompt", side_effect=se):
33
            yield
34
 
35
 
36
# --- Helper functions to abstract message format differences ---
37
 
38
 
39
def get_text(msg: dict) -> str:
40
    """Extract text content from a message regardless of format."""
41
    content = msg.get("content")
42
    if content is None:
43
        return ""
44
    if isinstance(content, str):
45
        return content
46
    if isinstance(content, list) and content:
47
        return content[0].get("text", "")
48
    return ""
49
 
50
 
51
# --- Model factory functions ---
52
 
53
 
54
def make_text_model(outputs_spec: list[tuple[str, list[dict]]], **kwargs) -> DeterministicModel:
55
    """Create a DeterministicModel from a list of (content, actions) tuples."""
56
    return DeterministicModel(outputs=[make_output(content, actions) for content, actions in outputs_spec], **kwargs)
57
 
58
 
59
def make_tc_model(outputs_spec: list[tuple[str, list[dict]]], **kwargs) -> DeterministicToolcallModel:
60
    """Create a DeterministicToolcallModel from a list of (content, actions) tuples."""
61
    outputs = []
62
    for i, (content, actions) in enumerate(outputs_spec):
63
        tc_actions = []
64
        tool_calls = []
65
        for j, action in enumerate(actions):
66
            tool_call_id = f"call_{i}_{j}"
67
            tc_actions.append({"command": action["command"], "tool_call_id": tool_call_id})
68
            tool_calls.append(
69
                {
70
                    "id": tool_call_id,
71
                    "type": "function",
72
                    "function": {"name": "bash", "arguments": f'{{"command": "{action["command"]}"}}'},
73
                }
74
            )
75
        outputs.append(make_toolcall_output(content, tool_calls, tc_actions))
76
    return DeterministicToolcallModel(outputs=outputs, **kwargs)
77
 
78
 
79
def make_response_api_model(
80
    outputs_spec: list[tuple[str, list[dict]]], **kwargs
81
) -> DeterministicResponseAPIToolcallModel:
82
    """Create a DeterministicResponseAPIToolcallModel from a list of (content, actions) tuples."""
83
    outputs = []
84
    for i, (content, actions) in enumerate(outputs_spec):
85
        api_actions = []
86
        for j, action in enumerate(actions):
87
            tool_call_id = f"call_resp_{i}_{j}"
88
            api_actions.append({"command": action["command"], "tool_call_id": tool_call_id})
89
        outputs.append(make_response_api_output(content, api_actions))
90
    return DeterministicResponseAPIToolcallModel(outputs=outputs, **kwargs)
91
 
92
 
93
def _make_model(outputs: list[tuple[str, list[dict]]], **kwargs) -> DeterministicModel:
94
    """Create a DeterministicModel from a list of (content, actions) tuples.
95
 
96
    Kept for backward compatibility with tests that don't need parametrization.
97
    """
98
    return make_text_model(outputs, **kwargs)
99
 
100
 
101
# --- Fixtures ---
102
 
103
 
104
@pytest.fixture
105
def default_config():
106
    """Load default agent config from config/default.yaml"""
107
    config_path = Path("src/minisweagent/config/default.yaml")
108
    with open(config_path) as f:
109
        config = yaml.safe_load(f)
110
    return config["agent"]
111
 
112
 
113
@pytest.fixture
114
def toolcall_config():
115
    """Load toolcall agent config from config/mini.yaml"""
116
    config_path = Path("src/minisweagent/config/mini.yaml")
117
    with open(config_path) as f:
118
        config = yaml.safe_load(f)
119
    return config["agent"]
120
 
121
 
122
@pytest.fixture(params=["text", "toolcall", "response_api"])
123
def model_factory(request, default_config, toolcall_config):
124
    """Parametrized fixture that returns (factory_fn, config) for all three model types."""
125
    if request.param == "text":
126
        return make_text_model, default_config
127
    elif request.param == "toolcall":
128
        return make_tc_model, toolcall_config
129
    else:  # response_api
130
        return make_response_api_model, toolcall_config
131
 
132
 
133
def test_successful_completion_with_confirmation(model_factory):
134
    """Test agent completes successfully when user confirms all actions."""
135
    factory, config = model_factory
136
    with mock_prompts(["", ""]):  # Confirm action with Enter, then no new task
137
        agent = InteractiveAgent(
138
            model=factory(
139
                [
140
                    ("Finishing", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'completed'"}]),
141
                ]
142
            ),
143
            env=LocalEnvironment(),
144
            **config,
145
        )
146
 
147
        info = agent.run("Test completion with confirmation")
148
        assert info["exit_status"] == "Submitted"
149
        assert info["submission"] == "completed\n"
150
        assert agent.n_calls == 1
151
 
152
 
153
def test_action_rejection_and_recovery(model_factory):
154
    """Test agent handles action rejection and can recover."""
155
    factory, config = model_factory
156
    with mock_prompts(
157
        [
158
            "User rejected this action",  # Reject first action
159
            "",  # Confirm second action
160
            "",  # No new task when agent wants to finish
161
        ]
162
    ):
163
        agent = InteractiveAgent(
164
            model=factory(
165
                [
166
                    ("First try", [{"command": "echo 'first attempt'"}]),
167
                    ("Second try", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'recovered'"}]),
168
                ]
169
            ),
170
            env=LocalEnvironment(),
171
            **config,
172
        )
173
 
174
        info = agent.run("Test action rejection")
175
        assert info["exit_status"] == "Submitted"
176
        assert info["submission"] == "recovered\n"
177
        assert agent.n_calls == 2
178
        # Should have rejection message in conversation
179
        rejection_messages = [msg for msg in agent.messages if "User rejected this action" in get_text(msg)]
180
        assert len(rejection_messages) == 1
181
 
182
 
183
def test_yolo_mode_activation(model_factory):
184
    """Test entering yolo mode disables confirmations."""
185
    factory, config = model_factory
186
    with mock_prompts(
187
        [
188
            "/y",  # Enter yolo mode
189
            "",  # This should be ignored since yolo mode is on
190
            "",  # No new task when agent wants to finish
191
        ]
192
    ):
193
        agent = InteractiveAgent(
194
            model=factory(
195
                [
196
                    ("Test command", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'yolo works'"}]),
197
                ]
198
            ),
199
            env=LocalEnvironment(),
200
            **config,
201
        )
202
 
203
        info = agent.run("Test yolo mode")
204
        assert info["exit_status"] == "Submitted"
205
        assert info["submission"] == "yolo works\n"
206
        assert agent.config.mode == "yolo"
207
 
208
 
209
def test_help_command(model_factory):
210
    """Test help command shows help and continues normally."""
211
    factory, config = model_factory
212
    with mock_prompts(
213
        [
214
            "/h",  # Show help
215
            "",  # Confirm action after help
216
            "",  # No new task when agent wants to finish
217
        ]
218
    ):
219
        with patch("minisweagent.agents.interactive.console.print") as mock_print:
220
            agent = InteractiveAgent(
221
                model=factory(
222
                    [
223
                        ("Test help", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'help shown'"}]),
224
                    ]
225
                ),
226
                env=LocalEnvironment(),
227
                **config,
228
            )
229
 
230
            info = agent.run("Test help command")
231
            assert info["exit_status"] == "Submitted"
232
            assert info["submission"] == "help shown\n"
233
            # Check that help was printed
234
            help_calls = [call for call in mock_print.call_args_list if "/y" in str(call)]
235
            assert len(help_calls) > 0
236
 
237
 
238
def test_whitelisted_actions_skip_confirmation(model_factory):
239
    """Test that whitelisted actions don't require confirmation."""
240
    factory, config = model_factory
241
    with mock_prompts([""]):  # No new task when agent wants to finish
242
        agent = InteractiveAgent(
243
            model=factory(
244
                [
245
                    (
246
                        "Whitelisted",
247
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'no confirmation needed'"}],
248
                    ),
249
                ]
250
            ),
251
            env=LocalEnvironment(),
252
            **{
253
                **config,
254
                "whitelist_actions": [r"echo.*"],
255
            },
256
        )
257
 
258
        info = agent.run("Test whitelisted actions")
259
        assert info["exit_status"] == "Submitted"
260
        assert info["submission"] == "no confirmation needed\n"
261
 
262
 
263
def _test_interruption_helper(
264
    factory, config, interruption_input, expected_message_fragment, problem_statement="Test interruption"
265
):
266
    """Helper function for testing interruption scenarios."""
267
    agent = InteractiveAgent(
268
        model=factory(
269
            [
270
                ("Initial step", [{"command": "echo 'will be interrupted'"}]),
271
                (
272
                    "Recovery",
273
                    [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'recovered from interrupt'"}],
274
                ),
275
            ]
276
        ),
277
        env=LocalEnvironment(),
278
        **config,
279
    )
280
 
281
    # Mock the query to raise KeyboardInterrupt on first call, then work normally
282
    original_query = agent.query
283
    call_count = 0
284
 
285
    def mock_query(*args, **kwargs):
286
        nonlocal call_count
287
        call_count += 1
288
        if call_count == 1:
289
            raise KeyboardInterrupt()
290
        return original_query(*args, **kwargs)
291
 
292
    # Mock console.input based on the interruption_input parameter
293
    input_call_count = 0
294
 
295
    def mock_input(prompt, **kwargs):
296
        nonlocal input_call_count
297
        input_call_count += 1
298
        if input_call_count == 1:
299
            return interruption_input  # For the interruption handling
300
        return ""  # Confirm all subsequent actions
301
 
302
    with mock_prompts(mock_input):
303
        with patch.object(agent, "query", side_effect=mock_query):
304
            info = agent.run(problem_statement)
305
 
306
    assert info["exit_status"] == "Submitted"
307
    assert info["submission"] == "recovered from interrupt\n"
308
    # Check that the expected interruption message was added
309
    interrupt_messages = [msg for msg in agent.messages if expected_message_fragment in get_text(msg)]
310
    assert len(interrupt_messages) == 1
311
 
312
    return agent, interrupt_messages[0]
313
 
314
 
315
def test_interruption_handling_with_message(model_factory):
316
    """Test that interruption with user message is handled properly."""
317
    factory, config = model_factory
318
    agent, interrupt_message = _test_interruption_helper(factory, config, "User interrupted", "Interrupted by user")
319
 
320
    # Additional verification specific to this test
321
    assert "User interrupted" in get_text(interrupt_message)
322
 
323
 
324
def test_interruption_handling_empty_message(model_factory):
325
    """Test that interruption with empty input is handled properly."""
326
    factory, config = model_factory
327
    _test_interruption_helper(factory, config, "", "Temporary interruption caught")
328
 
329
 
330
def test_multiple_confirmations_and_commands(model_factory):
331
    """Test complex interaction with multiple confirmations and commands."""
332
    factory, config = model_factory
333
    with mock_prompts(
334
        [
335
            "reject first",  # Reject first action
336
            "/h",  # Show help for second action
337
            "/y",  # After help, enter yolo mode
338
            "",  # After yolo mode enabled, confirm (but yolo mode will skip future confirmations)
339
            "",  # No new task when agent wants to finish
340
        ]
341
    ):
342
        agent = InteractiveAgent(
343
            model=factory(
344
                [
345
                    ("First action", [{"command": "echo 'first'"}]),
346
                    (
347
                        "Second action",
348
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'complex flow completed'"}],
349
                    ),
350
                ]
351
            ),
352
            env=LocalEnvironment(),
353
            **config,
354
        )
355
 
356
        info = agent.run("Test complex interaction flow")
357
        assert info["exit_status"] == "Submitted"
358
        assert info["submission"] == "complex flow completed\n"
359
        assert agent.config.mode == "yolo"  # Should be in yolo mode
360
        assert agent.n_calls == 2
361
 
362
 
363
def test_non_whitelisted_action_requires_confirmation(model_factory):
364
    """Test that non-whitelisted actions still require confirmation."""
365
    factory, config = model_factory
366
    with mock_prompts(["", ""]):  # Confirm action, then no new task
367
        agent = InteractiveAgent(
368
            model=factory(
369
                [
370
                    (
371
                        "Non-whitelisted",
372
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'confirmed'"}],
373
                    ),
374
                ]
375
            ),
376
            env=LocalEnvironment(),
377
            **{
378
                **config,
379
                "whitelist_actions": [r"ls.*"],  # Only ls commands whitelisted
380
            },
381
        )
382
 
383
        info = agent.run("Test non-whitelisted action")
384
        assert info["exit_status"] == "Submitted"
385
        assert info["submission"] == "confirmed\n"
386
 
387
 
388
# New comprehensive mode switching tests
389
 
390
 
391
def test_human_mode_basic_functionality(model_factory):
392
    """Test human mode where user enters shell commands directly."""
393
    factory, config = model_factory
394
    with mock_prompts(
395
        [
396
            "echo 'user command'",  # User enters shell command
397
            "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'human mode works'",  # User enters final command
398
            "",  # No new task when agent wants to finish
399
        ]
400
    ):
401
        agent = InteractiveAgent(
402
            model=factory([]),  # LM shouldn't be called in human mode
403
            env=LocalEnvironment(),
404
            **{
405
                **config,
406
                "mode": "human",
407
            },
408
        )
409
 
410
        info = agent.run("Test human mode")
411
        assert info["exit_status"] == "Submitted"
412
        assert info["submission"] == "human mode works\n"
413
        assert agent.config.mode == "human"
414
        assert agent.n_calls == 0  # LM should not be called
415
 
416
 
417
def test_human_mode_switch_to_yolo(model_factory):
418
    """Test switching from human mode to yolo mode."""
419
    factory, config = model_factory
420
    with mock_prompts(
421
        [
422
            "/y",  # Switch to yolo mode from human mode
423
            "",  # Confirm action in yolo mode (though no confirmation needed)
424
            "",  # No new task when agent wants to finish
425
        ]
426
    ):
427
        agent = InteractiveAgent(
428
            model=factory(
429
                [
430
                    (
431
                        "LM action",
432
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'switched to yolo'"}],
433
                    ),
434
                ]
435
            ),
436
            env=LocalEnvironment(),
437
            **{
438
                **config,
439
                "mode": "human",
440
            },
441
        )
442
 
443
        info = agent.run("Test human to yolo switch")
444
        assert info["exit_status"] == "Submitted"
445
        assert info["submission"] == "switched to yolo\n"
446
        assert agent.config.mode == "yolo"
447
        assert agent.n_calls == 1
448
 
449
 
450
def test_human_mode_switch_to_confirm(model_factory):
451
    """Test switching from human mode to confirm mode."""
452
    factory, config = model_factory
453
    with mock_prompts(
454
        [
455
            "/c",  # Switch to confirm mode from human mode
456
            "",  # Confirm action in confirm mode
457
            "",  # No new task when agent wants to finish
458
        ]
459
    ):
460
        agent = InteractiveAgent(
461
            model=factory(
462
                [
463
                    (
464
                        "LM action",
465
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'switched to confirm'"}],
466
                    ),
467
                ]
468
            ),
469
            env=LocalEnvironment(),
470
            **{
471
                **config,
472
                "mode": "human",
473
            },
474
        )
475
 
476
        info = agent.run("Test human to confirm switch")
477
        assert info["exit_status"] == "Submitted"
478
        assert info["submission"] == "switched to confirm\n"
479
        assert agent.config.mode == "confirm"
480
        assert agent.n_calls == 1
481
 
482
 
483
def test_confirmation_mode_switch_to_human_with_rejection(model_factory):
484
    """Test switching from confirm mode to human mode with /u command."""
485
    factory, config = model_factory
486
    with mock_prompts(
487
        [
488
            "/u",  # Switch to human mode and reject action
489
            "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'human command after rejection'",  # Human command
490
            "",  # No new task when agent wants to finish
491
        ]
492
    ):
493
        agent = InteractiveAgent(
494
            model=factory(
495
                [
496
                    ("LM action", [{"command": "echo 'first action'"}]),
497
                    ("Recovery action", [{"command": "echo 'recovery'"}]),
498
                ]
499
            ),
500
            env=LocalEnvironment(),
501
            **{
502
                **config,
503
                "mode": "confirm",
504
            },
505
        )
506
 
507
        info = agent.run("Test confirm to human switch")
508
        assert info["exit_status"] == "Submitted"
509
        assert info["submission"] == "human command after rejection\n"
510
        assert agent.config.mode == "human"
511
        # Should have rejection message
512
        rejection_messages = [msg for msg in agent.messages if "Switching to human mode" in get_text(msg)]
513
        assert len(rejection_messages) == 1
514
 
515
 
516
def test_confirmation_mode_switch_to_yolo_and_continue(model_factory):
517
    """Test switching from confirm mode to yolo mode with /y and continuing with action."""
518
    factory, config = model_factory
519
    with mock_prompts(
520
        [
521
            "/y",  # Switch to yolo mode and confirm current action
522
            "",  # No new task when agent wants to finish
523
        ]
524
    ):
525
        agent = InteractiveAgent(
526
            model=factory(
527
                [
528
                    (
529
                        "LM action",
530
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'switched and continued'"}],
531
                    ),
532
                ]
533
            ),
534
            env=LocalEnvironment(),
535
            **{
536
                **config,
537
                "mode": "confirm",
538
            },
539
        )
540
 
541
        info = agent.run("Test confirm to yolo switch")
542
        assert info["exit_status"] == "Submitted"
543
        assert info["submission"] == "switched and continued\n"
544
        assert agent.config.mode == "yolo"
545
 
546
 
547
def test_mode_switch_during_keyboard_interrupt(model_factory):
548
    """Test mode switching during keyboard interrupt handling."""
549
    factory, config = model_factory
550
    agent = InteractiveAgent(
551
        model=factory(
552
            [
553
                ("Initial step", [{"command": "echo 'will be interrupted'"}]),
554
                (
555
                    "Recovery",
556
                    [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'recovered after mode switch'"}],
557
                ),
558
            ]
559
        ),
560
        env=LocalEnvironment(),
561
        **{
562
            **config,
563
            "mode": "confirm",
564
        },
565
    )
566
 
567
    # Mock the query to raise KeyboardInterrupt on first call
568
    original_query = agent.query
569
    call_count = 0
570
 
571
    def mock_query(*args, **kwargs):
572
        nonlocal call_count
573
        call_count += 1
574
        if call_count == 1:
575
            raise KeyboardInterrupt()
576
        return original_query(*args, **kwargs)
577
 
578
    with mock_prompts(
579
        [
580
            "/y",  # Switch to yolo mode during interrupt
581
            "",  # Confirm subsequent actions (though yolo mode won't ask)
582
        ]
583
    ):
584
        with patch.object(agent, "query", side_effect=mock_query):
585
            info = agent.run("Test interrupt mode switch")
586
 
587
    assert info["exit_status"] == "Submitted"
588
    assert info["submission"] == "recovered after mode switch\n"
589
    assert agent.config.mode == "yolo"
590
    # Should have interruption message
591
    interrupt_messages = [msg for msg in agent.messages if "Temporary interruption caught" in get_text(msg)]
592
    assert len(interrupt_messages) == 1
593
 
594
 
595
def test_already_in_mode_behavior(model_factory):
596
    """Test behavior when trying to switch to the same mode."""
597
    factory, config = model_factory
598
    with mock_prompts(
599
        [
600
            "/c",  # Try to switch to confirm mode when already in confirm mode
601
            "",  # Confirm action after the "already in mode" recursive prompt
602
            "",  # No new task when agent wants to finish
603
        ]
604
    ):
605
        agent = InteractiveAgent(
606
            model=factory(
607
                [
608
                    (
609
                        "Test action",
610
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'already in mode'"}],
611
                    ),
612
                ]
613
            ),
614
            env=LocalEnvironment(),
615
            **{
616
                **config,
617
                "mode": "confirm",
618
            },
619
        )
620
 
621
        info = agent.run("Test already in mode")
622
        assert info["exit_status"] == "Submitted"
623
        assert info["submission"] == "already in mode\n"
624
        assert agent.config.mode == "confirm"
625
 
626
 
627
def test_all_mode_transitions_yolo_to_others(model_factory):
628
    """Test transitions from yolo mode to other modes."""
629
    factory, config = model_factory
630
    with mock_prompts(
631
        [
632
            "/c",  # Switch from yolo to confirm
633
            "",  # Confirm action in confirm mode
634
            "",  # No new task when agent wants to finish
635
        ]
636
    ):
637
        agent = InteractiveAgent(
638
            model=factory(
639
                [
640
                    ("First action", [{"command": "echo 'yolo action'"}]),
641
                    (
642
                        "Second action",
643
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'confirm action'"}],
644
                    ),
645
                ]
646
            ),
647
            env=LocalEnvironment(),
648
            **{
649
                **config,
650
                "mode": "yolo",
651
            },
652
        )
653
 
654
        # Trigger first action in yolo mode (should execute without confirmation)
655
        # Then interrupt to switch mode
656
        original_query = agent.query
657
        call_count = 0
658
 
659
        def mock_query(*args, **kwargs):
660
            nonlocal call_count
661
            call_count += 1
662
            if call_count == 2:  # Interrupt on second query
663
                raise KeyboardInterrupt()
664
            return original_query(*args, **kwargs)
665
 
666
        with patch.object(agent, "query", side_effect=mock_query):
667
            info = agent.run("Test yolo to confirm transition")
668
 
669
        assert info["exit_status"] == "Submitted"
670
        assert info["submission"] == "confirm action\n"
671
        assert agent.config.mode == "confirm"
672
 
673
 
674
def test_all_mode_transitions_confirm_to_human(model_factory):
675
    """Test transition from confirm mode to human mode."""
676
    factory, config = model_factory
677
    with mock_prompts(
678
        [
679
            "/u",  # Switch from confirm to human (rejecting action)
680
            "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'human command'",  # User enters command in human mode
681
            "",  # No new task when agent wants to finish
682
        ]
683
    ):
684
        agent = InteractiveAgent(
685
            model=factory([("LM action", [{"command": "echo 'rejected action'"}])]),
686
            env=LocalEnvironment(),
687
            **{
688
                **config,
689
                "mode": "confirm",
690
            },
691
        )
692
 
693
        info = agent.run("Test confirm to human transition")
694
        assert info["exit_status"] == "Submitted"
695
        assert info["submission"] == "human command\n"
696
        assert agent.config.mode == "human"
697
 
698
 
699
def test_help_command_from_different_contexts(model_factory):
700
    """Test help command works from different contexts (confirmation, interrupt, human mode)."""
701
    factory, config = model_factory
702
    # Test help during confirmation
703
    with mock_prompts(
704
        [
705
            "/h",  # Show help during confirmation
706
            "",  # Confirm after help
707
            "",  # No new task when agent wants to finish
708
        ]
709
    ):
710
        with patch("minisweagent.agents.interactive.console.print") as mock_print:
711
            agent = InteractiveAgent(
712
                model=factory(
713
                    [
714
                        (
715
                            "Test action",
716
                            [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'help works'"}],
717
                        ),
718
                    ]
719
                ),
720
                env=LocalEnvironment(),
721
                **{
722
                    **config,
723
                    "mode": "confirm",
724
                },
725
            )
726
 
727
            info = agent.run("Test help from confirmation")
728
            assert info["exit_status"] == "Submitted"
729
            assert info["submission"] == "help works\n"
730
            # Verify help was shown
731
            help_calls = [call for call in mock_print.call_args_list if "Current mode: " in str(call)]
732
            assert len(help_calls) > 0
733
 
734
 
735
def test_help_command_from_human_mode(model_factory):
736
    """Test help command works from human mode."""
737
    factory, config = model_factory
738
    with mock_prompts(
739
        [
740
            "/h",  # Show help in human mode
741
            "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'help in human mode'",  # User command after help
742
            "",  # No new task when agent wants to finish
743
        ]
744
    ):
745
        with patch("minisweagent.agents.interactive.console.print") as mock_print:
746
            agent = InteractiveAgent(
747
                model=factory([]),  # LM shouldn't be called
748
                env=LocalEnvironment(),
749
                **{
750
                    **config,
751
                    "mode": "human",
752
                },
753
            )
754
 
755
            info = agent.run("Test help from human mode")
756
            assert info["exit_status"] == "Submitted"
757
            assert info["submission"] == "help in human mode\n"
758
            # Verify help was shown
759
            help_calls = [call for call in mock_print.call_args_list if "Current mode: " in str(call)]
760
            assert len(help_calls) > 0
761
 
762
 
763
def test_complex_mode_switching_sequence(model_factory):
764
    """Test complex sequence of mode switches across different contexts."""
765
    factory, config = model_factory
766
    agent = InteractiveAgent(
767
        model=factory(
768
            [
769
                ("Action 1", [{"command": "echo 'action1'"}]),
770
                ("Action 2", [{"command": "echo 'action2'"}]),
771
                ("Action 3", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'final action'"}]),
772
            ]
773
        ),
774
        env=LocalEnvironment(),
775
        **{
776
            **config,
777
            "mode": "confirm",
778
        },
779
    )
780
 
781
    # Mock interruption on second query
782
    original_query = agent.query
783
    call_count = 0
784
 
785
    def mock_query(*args, **kwargs):
786
        nonlocal call_count
787
        call_count += 1
788
        if call_count == 2:
789
            raise KeyboardInterrupt()
790
        return original_query(*args, **kwargs)
791
 
792
    with mock_prompts(
793
        [
794
            "/y",  # Confirm->Yolo during first action confirmation
795
            "/u",  # Yolo->Human during interrupt
796
            "/c",  # Human->Confirm in human mode
797
            "",  # Confirm final action
798
            "",  # No new task when agent wants to finish
799
            "",  # Extra empty input for any additional prompts
800
            "",  # Extra empty input for any additional prompts
801
        ]
802
    ):
803
        with patch.object(agent, "query", side_effect=mock_query):
804
            info = agent.run("Test complex mode switching")
805
 
806
    assert info["exit_status"] == "Submitted"
807
    assert info["submission"] == "final action\n"
808
    assert agent.config.mode == "confirm"  # Should end in confirm mode
809
 
810
 
811
def test_limits_exceeded_with_user_continuation(model_factory):
812
    """Test that when limits are exceeded, user can provide new limits and execution continues."""
813
    factory, config = model_factory
814
    # Create agent with very low limits that will be exceeded
815
    agent = InteractiveAgent(
816
        model=factory(
817
            [
818
                ("Step 1", [{"command": "echo 'first step'"}]),
819
                ("Step 2", [{"command": "echo 'second step'"}]),
820
                (
821
                    "Final step",
822
                    [
823
                        {
824
                            "command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'completed after limit increase'"
825
                        }
826
                    ],
827
                ),
828
            ],
829
            cost_per_call=0.6,  # Will exceed cost_limit=0.5 on first call
830
        ),
831
        env=LocalEnvironment(),
832
        **{
833
            **config,
834
            "step_limit": 10,  # High enough to not interfere initially
835
            "cost_limit": 0.5,  # Will be exceeded with first model call (cost=0.6),
836
            "mode": "yolo",  # Use yolo mode to avoid confirmation prompts,
837
        },
838
    )
839
 
840
    # Mock input() to provide new limits when prompted
841
    with patch("builtins.input", side_effect=["10", "5.0"]):  # New step_limit=10, cost_limit=5.0
842
        with mock_prompts([""]):  # No new task
843
            with patch("minisweagent.agents.interactive.console.print"):  # Suppress console output
844
                info = agent.run("Test limits exceeded with continuation")
845
 
846
    assert info["exit_status"] == "Submitted"
847
    assert info["submission"] == "completed after limit increase\n"
848
    assert agent.n_calls == 3  # Should complete all 3 steps
849
    assert agent.config.step_limit == 10  # Should have updated step limit
850
    assert agent.config.cost_limit == 5.0  # Should have updated cost limit
851
 
852
 
853
def test_limits_exceeded_multiple_times_with_continuation(model_factory):
854
    """Test that limits can be exceeded and updated multiple times."""
855
    factory, config = model_factory
856
    agent = InteractiveAgent(
857
        model=factory(
858
            [
859
                ("Step 1", [{"command": "echo 'step1'"}]),
860
                ("Step 2", [{"command": "echo 'step2'"}]),
861
                ("Step 3", [{"command": "echo 'step3'"}]),
862
                ("Step 4", [{"command": "echo 'step4'"}]),
863
                (
864
                    "Final",
865
                    [
866
                        {
867
                            "command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'completed after multiple increases'"
868
                        }
869
                    ],
870
                ),
871
            ],
872
            cost_per_call=1.0,  # Standard cost per call
873
        ),
874
        env=LocalEnvironment(),
875
        **{
876
            **config,
877
            "step_limit": 1,  # Will be exceeded after first step
878
            "cost_limit": 100.0,  # High enough to not interfere,
879
            "mode": "yolo",
880
        },
881
    )
882
 
883
    # Mock input() to provide new limits multiple times
884
    # First limit increase: step_limit=2, then step_limit=10 when exceeded again
885
    with patch("builtins.input", side_effect=["2", "100.0", "10", "100.0"]):
886
        with mock_prompts([""]):  # No new task
887
            with patch("minisweagent.agents.interactive.console.print"):
888
                info = agent.run("Test multiple limit increases")
889
 
890
    assert info["exit_status"] == "Submitted"
891
    assert info["submission"] == "completed after multiple increases\n"
892
    assert agent.n_calls == 5  # Should complete all 5 steps
893
    assert agent.config.step_limit == 10  # Should have final updated step limit
894
 
895
 
896
def test_continue_after_completion_with_new_task(model_factory):
897
    """Test that user can provide a new task when agent wants to finish."""
898
    factory, config = model_factory
899
    with mock_prompts(
900
        [
901
            "",  # Confirm first action
902
            "Create a new file",  # Provide new task when agent wants to finish
903
            "",  # Confirm second action for new task
904
            "",  # Don't provide another task after second completion (finish)
905
        ]
906
    ):
907
        agent = InteractiveAgent(
908
            model=factory(
909
                [
910
                    (
911
                        "First task",
912
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'first task completed'"}],
913
                    ),
914
                    (
915
                        "Second task",
916
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'new task completed'"}],
917
                    ),
918
                ]
919
            ),
920
            env=LocalEnvironment(),
921
            **config,
922
        )
923
 
924
        info = agent.run("Complete the initial task")
925
        assert info["exit_status"] == "Submitted"
926
        assert info["submission"] == "new task completed\n"
927
        assert agent.n_calls == 2
928
        # Should have the new task message in conversation
929
        new_task_messages = [
930
            msg for msg in agent.messages if "The user added a new task: Create a new file" in get_text(msg)
931
        ]
932
        assert len(new_task_messages) == 1
933
 
934
 
935
def test_continue_after_completion_without_new_task(model_factory):
936
    """Test that agent finishes normally when user doesn't provide a new task."""
937
    factory, config = model_factory
938
    with mock_prompts(
939
        [
940
            "",  # Confirm first action
941
            "",  # Don't provide new task when agent wants to finish (empty input)
942
        ]
943
    ):
944
        agent = InteractiveAgent(
945
            model=factory(
946
                [
947
                    (
948
                        "Task completion",
949
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'original task completed'"}],
950
                    ),
951
                ]
952
            ),
953
            env=LocalEnvironment(),
954
            **config,
955
        )
956
 
957
        info = agent.run("Complete the task")
958
        assert info["exit_status"] == "Submitted"
959
        assert info["submission"] == "original task completed\n"
960
        assert agent.n_calls == 1
961
        # Should not have any new task messages
962
        new_task_messages = [msg for msg in agent.messages if "The user added a new task" in get_text(msg)]
963
        assert len(new_task_messages) == 0
964
 
965
 
966
def test_continue_after_completion_multiple_cycles(model_factory):
967
    """Test multiple continuation cycles with new tasks."""
968
    factory, config = model_factory
969
    with mock_prompts(
970
        [
971
            "",  # Confirm first action
972
            "Second task",  # Provide first new task
973
            "",  # Confirm second action
974
            "Third task",  # Provide second new task
975
            "",  # Confirm third action
976
            "",  # Don't provide another task (finish)
977
        ]
978
    ):
979
        agent = InteractiveAgent(
980
            model=factory(
981
                [
982
                    ("First", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'first completed'"}]),
983
                    ("Second", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'second completed'"}]),
984
                    ("Third", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'third completed'"}]),
985
                ]
986
            ),
987
            env=LocalEnvironment(),
988
            **config,
989
        )
990
 
991
        info = agent.run("Initial task")
992
        assert info["exit_status"] == "Submitted"
993
        assert info["submission"] == "third completed\n"
994
        assert agent.n_calls == 3
995
        # Should have both new task messages
996
        new_task_messages = [msg for msg in agent.messages if "The user added a new task" in get_text(msg)]
997
        assert len(new_task_messages) == 2
998
        assert "Second task" in get_text(new_task_messages[0])
999
        assert "Third task" in get_text(new_task_messages[1])
1000
 
1001
 
1002
def test_continue_after_completion_in_yolo_mode(model_factory):
1003
    """Test continuation when starting in yolo mode (no confirmations needed)."""
1004
    factory, config = model_factory
1005
    with mock_prompts(
1006
        [
1007
            "Create a second task",  # Provide new task when agent wants to finish
1008
            "",  # Don't provide another task after second completion (finish)
1009
        ]
1010
    ):
1011
        agent = InteractiveAgent(
1012
            model=factory(
1013
                [
1014
                    ("First", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'first completed'"}]),
1015
                    (
1016
                        "Second",
1017
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'second task completed'"}],
1018
                    ),
1019
                ]
1020
            ),
1021
            env=LocalEnvironment(),
1022
            **{
1023
                **config,
1024
                "mode": "yolo",  # Start in yolo mode
1025
            },
1026
        )
1027
 
1028
        info = agent.run("Initial task")
1029
        assert info["exit_status"] == "Submitted"
1030
        assert info["submission"] == "second task completed\n"
1031
        assert agent.config.mode == "yolo"
1032
        assert agent.n_calls == 2
1033
        # Should have the new task message
1034
        new_task_messages = [msg for msg in agent.messages if "Create a second task" in get_text(msg)]
1035
        assert len(new_task_messages) == 1
1036
 
1037
 
1038
def test_confirm_exit_enabled_asks_for_confirmation(model_factory):
1039
    """Test that when confirm_exit=True, agent asks for confirmation before finishing."""
1040
    factory, config = model_factory
1041
    with mock_prompts(["", ""]):  # Confirm action, then no new task (empty string to exit)
1042
        agent = InteractiveAgent(
1043
            model=factory(
1044
                [
1045
                    ("Finishing", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'completed'"}]),
1046
                ]
1047
            ),
1048
            env=LocalEnvironment(),
1049
            **{
1050
                **config,
1051
                "confirm_exit": True,  # Should ask for confirmation
1052
            },
1053
        )
1054
 
1055
        info = agent.run("Test confirm exit enabled")
1056
        assert info["exit_status"] == "Submitted"
1057
        assert info["submission"] == "completed\n"
1058
        assert agent.n_calls == 1
1059
 
1060
 
1061
def test_confirm_exit_disabled_exits_immediately(model_factory):
1062
    """Test that when confirm_exit=False, agent exits immediately without asking."""
1063
    factory, config = model_factory
1064
    with mock_prompts([""]):  # Only confirm action, no exit confirmation needed
1065
        agent = InteractiveAgent(
1066
            model=factory(
1067
                [
1068
                    ("Finishing", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'completed'"}]),
1069
                ]
1070
            ),
1071
            env=LocalEnvironment(),
1072
            **{
1073
                **config,
1074
                "confirm_exit": False,  # Should NOT ask for confirmation
1075
            },
1076
        )
1077
 
1078
        info = agent.run("Test confirm exit disabled")
1079
        assert info["exit_status"] == "Submitted"
1080
        assert info["submission"] == "completed\n"
1081
        assert agent.n_calls == 1
1082
 
1083
 
1084
def test_confirm_exit_with_new_task_continues_execution(model_factory):
1085
    """Test that when user provides new task at exit confirmation, agent continues."""
1086
    factory, config = model_factory
1087
    with mock_prompts(
1088
        [
1089
            "",  # Confirm first action
1090
            "Please do one more thing",  # Provide new task instead of exiting
1091
            "",  # Confirm second action
1092
            "",  # No new task on second exit confirmation
1093
        ]
1094
    ):
1095
        agent = InteractiveAgent(
1096
            model=factory(
1097
                [
1098
                    ("First task", [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'first done'"}]),
1099
                    (
1100
                        "Additional task",
1101
                        [{"command": "echo 'COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT'\necho 'additional done'"}],
1102
                    ),
1103
                ]
1104
            ),
1105
            env=LocalEnvironment(),
1106
            **{
1107
                **config,
1108
                "confirm_exit": True,
1109
            },
1110
        )
1111
 
1112
        info = agent.run("Test exit with new task")
1113
        assert info["exit_status"] == "Submitted"
1114
        assert info["submission"] == "additional done\n"
1115
        assert agent.n_calls == 2
1116
        # Check that the new task was added to the conversation
1117
        new_task_messages = [msg for msg in agent.messages if "Please do one more thing" in get_text(msg)]
1118
        assert len(new_task_messages) == 1
1119
 
1120
 
1121
def test_confirm_exit_config_field_defaults(model_factory):
1122
    """Test that confirm_exit field has correct default value."""
1123
    factory, config = model_factory
1124
    agent = InteractiveAgent(
1125
        model=factory([]),
1126
        env=LocalEnvironment(),
1127
        **config,
1128
    )
1129
    # Default should be True
1130
    assert agent.config.confirm_exit is True
1131
 
1132
 
1133
def test_confirm_exit_config_field_can_be_set(model_factory):
1134
    """Test that confirm_exit field can be explicitly set."""
1135
    factory, config = model_factory
1136
    agent_with_confirm = InteractiveAgent(
1137
        model=factory([]),
1138
        env=LocalEnvironment(),
1139
        **{
1140
            **config,
1141
            "confirm_exit": True,
1142
        },
1143
    )
1144
    assert agent_with_confirm.config.confirm_exit is True
1145
 
1146
    agent_without_confirm = InteractiveAgent(
1147
        model=factory([]),
1148
        env=LocalEnvironment(),
1149
        **{
1150
            **config,
1151
            "confirm_exit": False,
1152
        },
1153
    )
1154
    assert agent_without_confirm.config.confirm_exit is False
1155
 
1155 lines