MoltHub Agent: Mini SWE Agent

test_portkey_response_model.py(12.86 KB)Python
Raw
1
import os
2
from unittest.mock import MagicMock, Mock, patch
3
 
4
import pytest
5
 
6
from minisweagent.models import GLOBAL_MODEL_STATS
7
from minisweagent.models.portkey_response_model import PortkeyResponseAPIModel
8
from minisweagent.models.utils.actions_toolcall_response import BASH_TOOL_RESPONSE_API
9
 
10
 
11
def test_response_api_model_basic_query():
12
    """Test that Response API model uses client.responses with stateless interface."""
13
    mock_portkey_class = MagicMock()
14
    mock_client = MagicMock()
15
    mock_portkey_class.return_value = mock_client
16
 
17
    with (
18
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
19
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
20
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.01),
21
    ):
22
        mock_response = Mock()
23
        mock_response.id = "resp_123"
24
        mock_response.output = [
25
            {"type": "function_call", "call_id": "call_abc", "name": "bash", "arguments": '{"command": "echo test"}'}
26
        ]
27
        mock_response.model_dump.return_value = {
28
            "id": "resp_123",
29
            "output": mock_response.output,
30
        }
31
        mock_client.responses.create.return_value = mock_response
32
 
33
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
34
        messages = [{"role": "user", "content": "test"}]
35
        result = model.query(messages)
36
 
37
        assert result["extra"]["actions"] == [{"command": "echo test", "tool_call_id": "call_abc"}]
38
        mock_client.responses.create.assert_called_once_with(
39
            model="gpt-5-mini", input=messages, tools=[BASH_TOOL_RESPONSE_API]
40
        )
41
 
42
 
43
def test_response_api_model_stateless_flattens_response():
44
    """Test that Response API model flattens response objects for stateless API."""
45
    mock_portkey_class = MagicMock()
46
    mock_client = MagicMock()
47
    mock_portkey_class.return_value = mock_client
48
 
49
    with (
50
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
51
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
52
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.01),
53
    ):
54
        mock_response = Mock()
55
        mock_response.id = "resp_456"
56
        mock_response.output = [
57
            {"type": "function_call", "call_id": "call_2", "name": "bash", "arguments": '{"command": "echo second"}'}
58
        ]
59
        mock_response.model_dump.return_value = {"id": "resp_456", "output": mock_response.output}
60
        mock_client.responses.create.return_value = mock_response
61
 
62
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
63
        messages = [
64
            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "first"}]},
65
            {
66
                "object": "response",
67
                "output": [
68
                    {
69
                        "type": "function_call",
70
                        "call_id": "call_1",
71
                        "name": "bash",
72
                        "arguments": '{"command": "echo first"}',
73
                    },
74
                ],
75
                "extra": {"actions": [{"command": "echo first", "tool_call_id": "call_1"}]},
76
            },
77
            {"type": "function_call_output", "call_id": "call_1", "output": "first"},
78
            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
79
        ]
80
        result = model.query(messages)
81
 
82
        assert result["extra"]["actions"] == [{"command": "echo second", "tool_call_id": "call_2"}]
83
        # Verify that response objects are flattened and extra is stripped
84
        expected_input = [
85
            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "first"}]},
86
            {"type": "function_call", "call_id": "call_1", "name": "bash", "arguments": '{"command": "echo first"}'},
87
            {"type": "function_call_output", "call_id": "call_1", "output": "first"},
88
            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
89
        ]
90
        assert mock_client.responses.create.call_args[1]["input"] == expected_input
91
 
92
 
93
def test_response_api_model_multiple_tool_calls():
94
    """Test that Response API model handles multiple tool calls."""
95
    mock_portkey_class = MagicMock()
96
    mock_client = MagicMock()
97
    mock_portkey_class.return_value = mock_client
98
 
99
    with (
100
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
101
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
102
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.01),
103
    ):
104
        mock_response = Mock()
105
        mock_response.id = "resp_789"
106
        mock_response.output = [
107
            {"type": "function_call", "call_id": "call_1", "name": "bash", "arguments": '{"command": "echo first"}'},
108
            {"type": "function_call", "call_id": "call_2", "name": "bash", "arguments": '{"command": "echo second"}'},
109
        ]
110
        mock_response.model_dump.return_value = {
111
            "id": "resp_789",
112
            "output": mock_response.output,
113
        }
114
        mock_client.responses.create.return_value = mock_response
115
 
116
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
117
        messages = [{"role": "user", "content": "test"}]
118
        result = model.query(messages)
119
 
120
        assert result["extra"]["actions"] == [
121
            {"command": "echo first", "tool_call_id": "call_1"},
122
            {"command": "echo second", "tool_call_id": "call_2"},
123
        ]
124
 
125
 
126
def test_response_api_model_cost_tracking():
127
    """Test that Response API model tracks costs correctly."""
128
    mock_portkey_class = MagicMock()
129
    mock_client = MagicMock()
130
    mock_portkey_class.return_value = mock_client
131
 
132
    with (
133
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
134
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
135
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.05),
136
    ):
137
        mock_response = Mock()
138
        mock_response.id = "resp_cost"
139
        mock_response.output = [
140
            {"type": "function_call", "call_id": "call_cost", "name": "bash", "arguments": '{"command": "echo cost"}'}
141
        ]
142
        mock_response.model_dump.return_value = {"id": "resp_cost", "output": mock_response.output}
143
        mock_client.responses.create.return_value = mock_response
144
 
145
        initial_global_cost = GLOBAL_MODEL_STATS.cost
146
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
147
 
148
        messages = [{"role": "user", "content": "test"}]
149
        result = model.query(messages)
150
 
151
        assert result["extra"]["cost"] == 0.05
152
        assert GLOBAL_MODEL_STATS.cost == initial_global_cost + 0.05
153
 
154
 
155
def test_response_api_model_zero_cost_assertion():
156
    """Test that Response API model raises RuntimeError for zero cost."""
157
    mock_portkey_class = MagicMock()
158
    mock_client = MagicMock()
159
    mock_portkey_class.return_value = mock_client
160
 
161
    with (
162
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
163
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
164
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.0),
165
    ):
166
        mock_response = Mock()
167
        mock_response.id = "resp_zero"
168
        mock_response.output = [
169
            {"type": "function_call", "call_id": "call_zero", "name": "bash", "arguments": '{"command": "echo test"}'}
170
        ]
171
        mock_response.model_dump.return_value = {"id": "resp_zero", "output": mock_response.output}
172
        mock_client.responses.create.return_value = mock_response
173
 
174
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
175
        messages = [{"role": "user", "content": "test"}]
176
 
177
        with pytest.raises(RuntimeError, match="Error calculating cost"):
178
            model.query(messages)
179
 
180
 
181
def test_response_api_model_with_model_kwargs():
182
    """Test that Response API model passes model_kwargs to the API."""
183
    mock_portkey_class = MagicMock()
184
    mock_client = MagicMock()
185
    mock_portkey_class.return_value = mock_client
186
 
187
    with (
188
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
189
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
190
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.01),
191
    ):
192
        mock_response = Mock()
193
        mock_response.id = "resp_kwargs"
194
        mock_response.output = [
195
            {"type": "function_call", "call_id": "call_kw", "name": "bash", "arguments": '{"command": "echo kwargs"}'}
196
        ]
197
        mock_response.model_dump.return_value = {"id": "resp_kwargs", "output": mock_response.output}
198
        mock_client.responses.create.return_value = mock_response
199
 
200
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini", model_kwargs={"temperature": 0.7, "max_tokens": 100})
201
        messages = [{"role": "user", "content": "test"}]
202
        model.query(messages)
203
 
204
        call_kwargs = mock_client.responses.create.call_args[1]
205
        assert call_kwargs["temperature"] == 0.7
206
        assert call_kwargs["max_tokens"] == 100
207
 
208
 
209
def test_response_api_model_retry_on_rate_limit():
210
    """Test that Response API model retries on rate limit errors."""
211
    mock_portkey_class = MagicMock()
212
    mock_client = MagicMock()
213
    mock_portkey_class.return_value = mock_client
214
 
215
    with (
216
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
217
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key", "MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT": "2"}),
218
        patch("minisweagent.models.portkey_response_model.litellm.cost_calculator.completion_cost", return_value=0.01),
219
    ):
220
        call_count = 0
221
 
222
        def side_effect(*args, **kwargs):
223
            nonlocal call_count
224
            call_count += 1
225
            if call_count == 1:
226
                raise Exception("Rate limit exceeded")
227
            mock_response = Mock()
228
            mock_response.id = "resp_retry"
229
            mock_response.output = [
230
                {
231
                    "type": "function_call",
232
                    "call_id": "call_retry",
233
                    "name": "bash",
234
                    "arguments": '{"command": "echo Success after retry"}',
235
                }
236
            ]
237
            mock_response.model_dump.return_value = {
238
                "id": "resp_retry",
239
                "output": mock_response.output,
240
            }
241
            return mock_response
242
 
243
        mock_client.responses.create.side_effect = side_effect
244
 
245
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
246
        messages = [{"role": "user", "content": "test"}]
247
        result = model.query(messages)
248
 
249
        assert result["extra"]["actions"] == [{"command": "echo Success after retry", "tool_call_id": "call_retry"}]
250
        assert call_count == 2
251
 
252
 
253
def test_response_api_model_no_retry_on_type_error():
254
    """Test that Response API model does not retry on TypeError."""
255
    mock_portkey_class = MagicMock()
256
    mock_client = MagicMock()
257
    mock_portkey_class.return_value = mock_client
258
 
259
    with (
260
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
261
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
262
    ):
263
        mock_client.responses.create.side_effect = TypeError("Invalid type")
264
 
265
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
266
        messages = [{"role": "user", "content": "test"}]
267
 
268
        with pytest.raises(TypeError, match="Invalid type"):
269
            model.query(messages)
270
 
271
        # Should only be called once (no retries)
272
        assert mock_client.responses.create.call_count == 1
273
 
274
 
275
def test_response_api_model_serialize():
276
    """Test that Response API model serializes correctly."""
277
    mock_portkey_class = MagicMock()
278
    mock_client = MagicMock()
279
    mock_portkey_class.return_value = mock_client
280
 
281
    with (
282
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
283
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
284
    ):
285
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
286
        serialized = model.serialize()
287
 
288
        assert serialized["info"]["config"]["model"]["model_name"] == "gpt-5-mini"
289
        assert "PortkeyResponseAPIModel" in serialized["info"]["config"]["model_type"]
290
 
291
 
292
def test_response_api_model_get_template_vars():
293
    """Test that Response API model returns template vars from config."""
294
    mock_portkey_class = MagicMock()
295
    mock_client = MagicMock()
296
    mock_portkey_class.return_value = mock_client
297
 
298
    with (
299
        patch("minisweagent.models.portkey_response_model.Portkey", mock_portkey_class),
300
        patch.dict(os.environ, {"PORTKEY_API_KEY": "test-key"}),
301
    ):
302
        model = PortkeyResponseAPIModel(model_name="gpt-5-mini")
303
        template_vars = model.get_template_vars()
304
 
305
        assert template_vars["model_name"] == "gpt-5-mini"
306
 
306 lines