Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions interpreter/core/computer/terminal/languages/sandbox/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
Sandbox-backed language implementations for OpenSandbox execution.
"""

from .sandbox_language import SandboxLanguage


class SandboxPython(SandboxLanguage):
file_extension = "py"
name = "Python"
aliases = ["py"]
sandbox_lang = "python"


class SandboxShell(SandboxLanguage):
file_extension = "sh"
name = "Shell"
aliases = ["bash", "sh", "zsh"]
sandbox_lang = "bash"


class SandboxJavaScript(SandboxLanguage):
file_extension = "js"
name = "JavaScript"
aliases = ["js"]
sandbox_lang = "javascript"
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""
SandboxLanguage - Base class for sandbox-backed language execution.

Extends BaseLanguage and delegates all execution to a shared SandboxManager.
"""

from ...base_language import BaseLanguage


class SandboxLanguage(BaseLanguage):
"""
Base class for languages that execute code in an OpenSandbox container.

Subclasses set `name`, `aliases`, `file_extension`, and `sandbox_lang`
to define which language they handle.
"""

_is_sandbox_language = True # Marker for Terminal instantiation logic
sandbox_lang = None # Override in subclasses: "python", "bash", "javascript"

def __init__(self, sandbox_manager):
self.sandbox_manager = sandbox_manager

def run(self, code):
yield from self.sandbox_manager.execute(self.sandbox_lang, code)

def stop(self):
self.sandbox_manager.stop()

def terminate(self):
# Don't kill the whole sandbox from one language handler.
# The SandboxManager.terminate() is called by Terminal.terminate().
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
"""
SandboxManager - Manages OpenSandbox lifecycle and code execution.

Owns a single sandbox instance and CodeInterpreterSync, providing
streaming code execution that yields LMC-format chunks.
"""

import os
import queue
import threading
import traceback

LANGUAGE_MAP = {
"python": "python",
"py": "python",
"shell": "bash",
"bash": "bash",
"sh": "bash",
"zsh": "bash",
"javascript": "javascript",
"js": "javascript",
}

# Default sandbox image for code interpreter
DEFAULT_IMAGE = "opensandbox/code-interpreter:latest"


class SandboxManager:
def __init__(
self,
api_key=None,
domain=None,
image=None,
timeout_minutes=10,
):
self.api_key = api_key or os.environ.get("OPEN_SANDBOX_API_KEY")
self.domain = domain or os.environ.get("OPEN_SANDBOX_DOMAIN")
self.image = image or os.environ.get("OPEN_SANDBOX_IMAGE", DEFAULT_IMAGE)
self.timeout_minutes = timeout_minutes

self._sandbox = None
self._code_interpreter = None
self._contexts = {} # language_name -> CodeContextSync
self._current_execution_id = None
self._lock = threading.Lock()

def _ensure_sandbox(self):
"""Lazily create the sandbox and code interpreter on first use."""
if self._sandbox is not None:
return

try:
from opensandbox.sync.sandbox import SandboxSync
from opensandbox.config import ConnectionConfig
from code_interpreter.sync.code_interpreter import CodeInterpreterSync
except ImportError:
raise ImportError(
"OpenSandbox packages are required for sandbox mode. "
"Install with: pip install opensandbox opensandbox-code-interpreter"
)

if not self.api_key:
raise ValueError(
"OpenSandbox API key is required. "
"Set via --sandbox_api_key or OPEN_SANDBOX_API_KEY env var."
)
if not self.domain:
raise ValueError(
"OpenSandbox domain is required. "
"Set via --sandbox_domain or OPEN_SANDBOX_DOMAIN env var."
)

from datetime import timedelta

config = ConnectionConfig(
api_key=self.api_key,
domain=self.domain,
)

self._sandbox = SandboxSync.create(
self.image,
connection_config=config,
timeout=timedelta(minutes=self.timeout_minutes),
)
self._code_interpreter = CodeInterpreterSync.create(sandbox=self._sandbox)

def _get_context(self, language):
"""Get or create an execution context for the given language."""
sandbox_lang = LANGUAGE_MAP.get(language.lower())
if sandbox_lang is None:
raise ValueError(
f"Language '{language}' is not supported in sandbox mode. "
f"Supported: {list(LANGUAGE_MAP.keys())}"
)

if sandbox_lang not in self._contexts:
self._contexts[sandbox_lang] = (
self._code_interpreter.codes.create_context(sandbox_lang)
)
return self._contexts[sandbox_lang], sandbox_lang

def execute(self, language, code):
"""
Execute code in the sandbox. Generator yielding LMC-format dicts.

Mirrors the streaming pattern from SubprocessLanguage.run().
"""
try:
self._ensure_sandbox()
except Exception:
yield {
"type": "console",
"format": "output",
"content": traceback.format_exc(),
}
return

try:
context, sandbox_lang = self._get_context(language)
except Exception:
yield {
"type": "console",
"format": "output",
"content": traceback.format_exc(),
}
return

from opensandbox.models.execd_sync import ExecutionHandlersSync

message_queue = queue.Queue()
done_event = threading.Event()
execution_result = [None] # mutable container for thread result

def on_stdout(msg):
message_queue.put({
"type": "console",
"format": "output",
"content": msg.text,
})

def on_stderr(msg):
message_queue.put({
"type": "console",
"format": "output",
"content": msg.text,
})

def on_error(err):
tb = "\n".join(err.traceback) if err.traceback else ""
content = f"{err.name}: {err.value}"
if tb:
content = f"{tb}\n{content}"
message_queue.put({
"type": "console",
"format": "output",
"content": content,
})

def on_execution_complete(complete):
done_event.set()

handlers = ExecutionHandlersSync(
on_stdout=on_stdout,
on_stderr=on_stderr,
on_error=on_error,
on_execution_complete=on_execution_complete,
)

def run_in_thread():
try:
result = self._code_interpreter.codes.run(
code,
context=context,
handlers=handlers,
)
execution_result[0] = result
if result and result.id:
self._current_execution_id = result.id
except Exception:
message_queue.put({
"type": "console",
"format": "output",
"content": traceback.format_exc(),
})
finally:
done_event.set()

thread = threading.Thread(target=run_in_thread, daemon=True)
thread.start()

# Yield output as it arrives, same pattern as SubprocessLanguage
while True:
try:
output = message_queue.get(timeout=0.3)
yield output
except queue.Empty:
if done_event.is_set():
# Drain remaining items
while not message_queue.empty():
yield message_queue.get()
break

# If execution produced results (e.g. expression values), yield them
result = execution_result[0]
if result and result.result:
for r in result.result:
if r.text:
yield {
"type": "console",
"format": "output",
"content": r.text,
}

self._current_execution_id = None

def stop(self):
"""Interrupt currently running execution."""
exec_id = self._current_execution_id
if exec_id and self._code_interpreter:
try:
self._code_interpreter.codes.interrupt(exec_id)
except Exception:
pass

def terminate(self):
"""Kill the sandbox and release all resources."""
if self._sandbox:
try:
self._sandbox.kill()
except Exception:
pass
try:
self._sandbox.close()
except Exception:
pass
self._sandbox = None
self._code_interpreter = None
self._contexts = {}
self._current_execution_id = None
59 changes: 43 additions & 16 deletions interpreter/core/computer/terminal/terminal.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,42 @@
class Terminal:
def __init__(self, computer):
self.computer = computer
self.languages = [
Ruby,
Python,
Shell,
JavaScript,
HTML,
AppleScript,
R,
PowerShell,
React,
Java,
]
self._sandbox_manager = None

if getattr(computer.interpreter, "sandbox_mode", False):
from .languages.sandbox import SandboxPython, SandboxShell, SandboxJavaScript
from .languages.sandbox.sandbox_manager import SandboxManager

self._sandbox_manager = SandboxManager(
api_key=getattr(computer.interpreter, "sandbox_api_key", None),
domain=getattr(computer.interpreter, "sandbox_domain", None),
)
self.languages = [
Ruby,
SandboxPython,
SandboxShell,
SandboxJavaScript,
HTML,
AppleScript,
R,
PowerShell,
React,
Java,
]
else:
self.languages = [
Ruby,
Python,
Shell,
JavaScript,
HTML,
AppleScript,
R,
PowerShell,
React,
Java,
]

self._active_languages = {}

def sudo_install(self, package):
Expand Down Expand Up @@ -88,7 +112,7 @@ def run(self, language, code, stream=False, display=False):
else:
return [{"type": "console", "format": "output", "content": f"Failed to install package {package}."}]

if language == "python":
if language == "python" and not self._sandbox_manager:
if (
self.computer.import_computer_api
and not self.computer._has_imported_computer_api
Expand Down Expand Up @@ -155,10 +179,10 @@ def run(self, language, code, stream=False, display=False):

def _streaming_run(self, language, code, display=False):
if language not in self._active_languages:
# Get the language. Pass in self.computer *if it takes a single argument*
# but pass in nothing if not. This makes custom languages easier to add / understand.
lang_class = self.get_language(language)
if lang_class.__init__.__code__.co_argcount > 1:
if hasattr(lang_class, '_is_sandbox_language') and lang_class._is_sandbox_language:
self._active_languages[language] = lang_class(self._sandbox_manager)
elif lang_class.__init__.__code__.co_argcount > 1:
self._active_languages[language] = lang_class(self.computer)
else:
self._active_languages[language] = lang_class()
Expand Down Expand Up @@ -205,3 +229,6 @@ def terminate(self):
): # Not sure why this is None sometimes. We should look into this
language.terminate()
del self._active_languages[language_name]

if self._sandbox_manager:
self._sandbox_manager.terminate()
Loading