| 1 | """Utilities for handling multimodal content in OpenAI-style messages."""
|
| 2 |
|
| 3 | import copy
|
| 4 | import re
|
| 5 | from typing import Any
|
| 6 |
|
| 7 | DEFAULT_MULTIMODAL_REGEX = (
|
| 8 | r"(?s)<MSWEA_MULTIMODAL_CONTENT><CONTENT_TYPE>(.+?)</CONTENT_TYPE>(.+?)</MSWEA_MULTIMODAL_CONTENT>"
|
| 9 | )
|
| 10 |
|
| 11 |
|
| 12 | def _expand_content_string(*, content: str, pattern: str) -> list[dict]:
|
| 13 | """Expand a content string, replacing multimodal tags with structured content."""
|
| 14 | matches = list(re.finditer(pattern, content))
|
| 15 | if not matches:
|
| 16 | return [{"type": "text", "text": content}]
|
| 17 | result = []
|
| 18 | last_end = 0
|
| 19 | for match in matches:
|
| 20 | text_before = content[last_end : match.start()]
|
| 21 | if text_before:
|
| 22 | result.append({"type": "text", "text": text_before})
|
| 23 | content_type = match.group(1).strip()
|
| 24 | extracted = match.group(2).strip()
|
| 25 | if content_type == "image_url":
|
| 26 | result.append({"type": "image_url", "image_url": {"url": extracted}})
|
| 27 | last_end = match.end()
|
| 28 | text_after = content[last_end:]
|
| 29 | if text_after:
|
| 30 | result.append({"type": "text", "text": text_after})
|
| 31 | return result
|
| 32 |
|
| 33 |
|
| 34 | def expand_multimodal_content(content: Any, *, pattern: str) -> Any:
|
| 35 | """Recursively expand multimodal content in messages.
|
| 36 | Note: Returns copy of content, original content is not modified.
|
| 37 | """
|
| 38 | if not pattern:
|
| 39 | return content
|
| 40 | content = copy.deepcopy(content)
|
| 41 | if isinstance(content, str):
|
| 42 | return _expand_content_string(content=content, pattern=pattern)
|
| 43 | if isinstance(content, list):
|
| 44 | return [expand_multimodal_content(item, pattern=pattern) for item in content]
|
| 45 | if isinstance(content, dict):
|
| 46 | if "content" not in content:
|
| 47 | return content
|
| 48 | content["content"] = expand_multimodal_content(content["content"], pattern=pattern)
|
| 49 | return content
|
| 50 | return str(content)
|
| 51 |
|