github-actions[bot]
GitHub deploy: 4d024c91d61f15a8b39171610ab1406915ef598d
d6703a1
import re
# ANSI escape code pattern - matches all common ANSI sequences
# This includes color codes, cursor movement, and other terminal control sequences
ANSI_ESCAPE_PATTERN = re.compile(
r"\x1b\[[0-9;]*[A-Za-z]|\x1b\([AB]|\x1b[PX^_].*?\x1b\\|\x1b\].*?(?:\x07|\x1b\\)"
)
def strip_ansi_codes(text: str) -> str:
"""
Strip ANSI escape codes from text.
ANSI escape codes can be introduced by LLMs that include terminal
color codes in their output. These codes cause syntax errors when
the code is sent to Jupyter for execution.
Common ANSI codes include:
- Color codes: \x1b[31m (red), \x1b[32m (green), etc.
- Reset codes: \x1b[0m, \x1b[39m
- Cursor movement: \x1b[1A, \x1b[2J, etc.
"""
return ANSI_ESCAPE_PATTERN.sub("", text)
def strip_markdown_code_fences(code: str) -> str:
"""
Strip markdown code fences if present.
This is a defensive, non-breaking change — if the code doesn't
contain fences, it passes through unchanged.
Handles patterns like:
- ```python
- ```py
- ```
"""
code = code.strip()
# Remove opening fence (```python, ```py, ``` etc.)
code = re.sub(r"^```\w*\n?", "", code)
# Remove closing fence
code = re.sub(r"\n?```\s*$", "", code)
return code.strip()
def sanitize_code(code: str) -> str:
"""
Sanitize code for execution by applying all necessary cleanup steps.
This is the recommended function to use before sending code to
interpreters like Jupyter or Pyodide.
Steps applied:
1. Strip ANSI escape codes (from LLM output)
2. Strip markdown code fences (if model included them)
"""
code = strip_ansi_codes(code)
code = strip_markdown_code_fences(code)
return code