| 1 | import pytest
|
| 2 | import yaml
|
| 3 |
|
| 4 | from minisweagent.run.benchmarks.utils.batch_progress import RunBatchProgressManager, _shorten_str
|
| 5 |
|
| 6 |
|
| 7 | @pytest.fixture
|
| 8 | def manager():
|
| 9 | """Create a basic RunBatchProgressManager for testing."""
|
| 10 | return RunBatchProgressManager(num_instances=5)
|
| 11 |
|
| 12 |
|
| 13 | @pytest.fixture
|
| 14 | def manager_with_yaml(tmp_path):
|
| 15 | """Create a RunBatchProgressManager with yaml reporting."""
|
| 16 | yaml_path = tmp_path / "report.yaml"
|
| 17 | return RunBatchProgressManager(num_instances=3, yaml_report_path=yaml_path), yaml_path
|
| 18 |
|
| 19 |
|
| 20 | @pytest.mark.parametrize(
|
| 21 | ("text", "max_len", "shorten_left", "expected"),
|
| 22 | [
|
| 23 | ("hello", 10, False, "hello "),
|
| 24 | ("hello world", 8, False, "hello..."),
|
| 25 | ("hello world", 8, True, "...world"),
|
| 26 | ("hello", 5, False, "hello"),
|
| 27 | ("hi", 5, False, "hi "),
|
| 28 | ],
|
| 29 | )
|
| 30 | def test_shorten_str(text, max_len, shorten_left, expected):
|
| 31 | assert _shorten_str(text, max_len, shorten_left) == expected
|
| 32 |
|
| 33 |
|
| 34 | def test_manager_initialization(manager):
|
| 35 | assert manager.n_completed == 0
|
| 36 | assert manager._instances_by_exit_status == {}
|
| 37 |
|
| 38 |
|
| 39 | def test_manager_with_yaml_path(manager_with_yaml):
|
| 40 | manager, yaml_path = manager_with_yaml
|
| 41 | assert manager._yaml_report_path == yaml_path
|
| 42 |
|
| 43 |
|
| 44 | def test_instance_lifecycle(manager):
|
| 45 | manager.on_instance_start("task_1")
|
| 46 | assert "task_1" in manager._spinner_tasks
|
| 47 | assert manager.n_completed == 0
|
| 48 |
|
| 49 | manager.on_instance_end("task_1", "success")
|
| 50 | assert manager.n_completed == 1
|
| 51 | assert manager._instances_by_exit_status["success"] == ["task_1"]
|
| 52 |
|
| 53 |
|
| 54 | @pytest.mark.parametrize(
|
| 55 | "statuses",
|
| 56 | [
|
| 57 | ["success", "failed", "success", "timeout"],
|
| 58 | ["error", "error", "error"],
|
| 59 | ["success"] * 5,
|
| 60 | ],
|
| 61 | )
|
| 62 | def test_multiple_instances(manager, statuses):
|
| 63 | for i, status in enumerate(statuses, 1):
|
| 64 | instance_id = f"task_{i}"
|
| 65 | manager.on_instance_start(instance_id)
|
| 66 | manager.on_instance_end(instance_id, status)
|
| 67 |
|
| 68 | assert manager.n_completed == len(statuses)
|
| 69 | for status in set(statuses):
|
| 70 | expected_count = statuses.count(status)
|
| 71 | assert len(manager._instances_by_exit_status[status]) == expected_count
|
| 72 |
|
| 73 |
|
| 74 | def test_uncaught_exception(manager):
|
| 75 | manager.on_instance_start("task_1")
|
| 76 | manager.on_uncaught_exception("task_1", ValueError("test error"))
|
| 77 |
|
| 78 | assert manager.n_completed == 1
|
| 79 | assert "Uncaught ValueError" in manager._instances_by_exit_status
|
| 80 |
|
| 81 |
|
| 82 | def test_update_instance_status(manager):
|
| 83 | manager.on_instance_start("task_1")
|
| 84 | manager.update_instance_status("task_1", "Processing files...")
|
| 85 |
|
| 86 |
|
| 87 | def test_yaml_report_generation(manager_with_yaml):
|
| 88 | manager, yaml_path = manager_with_yaml
|
| 89 |
|
| 90 | manager.on_instance_start("task_1")
|
| 91 | manager.on_instance_end("task_1", "success")
|
| 92 | manager.on_instance_start("task_2")
|
| 93 | manager.on_instance_end("task_2", "failed")
|
| 94 |
|
| 95 | assert yaml_path.exists()
|
| 96 | data = yaml.safe_load(yaml_path.read_text())
|
| 97 | assert data["instances_by_exit_status"]["success"] == ["task_1"]
|
| 98 | assert data["instances_by_exit_status"]["failed"] == ["task_2"]
|
| 99 |
|
| 100 |
|
| 101 | def test_get_overview_data(manager):
|
| 102 | manager.on_instance_start("task_1")
|
| 103 | manager.on_instance_end("task_1", "success")
|
| 104 |
|
| 105 | overview_data = manager._get_overview_data()
|
| 106 | assert overview_data == {"instances_by_exit_status": {"success": ["task_1"]}}
|
| 107 |
|
| 108 |
|
| 109 | def test_print_report(manager, capsys):
|
| 110 | """Test that print_report produces expected output."""
|
| 111 | manager.on_instance_start("task_1")
|
| 112 | manager.on_instance_end("task_1", "success")
|
| 113 | manager.on_instance_start("task_2")
|
| 114 | manager.on_instance_end("task_2", "failed")
|
| 115 |
|
| 116 | manager.print_report()
|
| 117 |
|
| 118 | captured = capsys.readouterr()
|
| 119 | assert "success: 1" in captured.out
|
| 120 | assert "failed: 1" in captured.out
|
| 121 | assert "task_1" in captured.out
|
| 122 | assert "task_2" in captured.out
|
| 123 |
|
| 124 |
|
| 125 | def test_concurrent_operations(manager):
|
| 126 | """Test handling multiple operations without corruption."""
|
| 127 | instance_ids = [f"task_{i}" for i in range(10)]
|
| 128 | statuses = ["success", "failed", "timeout"] * 4
|
| 129 |
|
| 130 | for i, instance_id in enumerate(instance_ids):
|
| 131 | manager.on_instance_start(instance_id)
|
| 132 | manager.update_instance_status(instance_id, f"step {i}")
|
| 133 | manager.on_instance_end(instance_id, statuses[i % 3])
|
| 134 |
|
| 135 | assert manager.n_completed == 10
|
| 136 | assert sum(len(instances) for instances in manager._instances_by_exit_status.values()) == 10
|
| 137 |
|