From 0d3e072360e4012ca5b3eaf29c8eb0f928074ed0 Mon Sep 17 00:00:00 2001 From: Haofeng Liang Date: Sun, 19 Apr 2026 00:11:57 +0800 Subject: [PATCH 1/3] feat(mcp): expose OM1 as an MCP server MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add src/expose/ — a stdio MCP server that exposes OM1 tools to external MCP clients (Claude Desktop, Cursor, Windsurf). Tools: - om1_move: 8 actions (forward/backward/turn_left/turn_right/spin/sit/stand/idle) - om1_speak: text (1-500 chars) - om1_face: 5 emotions (joy/smile/ponder/alert/sad) All tools validated via JSON Schema enums, giving the LLM a precise tool contract instead of free-form strings. Architecture: - server.py: MCP wiring and pure handle_tool_call() dispatcher - websim_adapter.py: WebSim interop with fail-fast port conflict check - tools.py: Enum definitions and tool schemas - config.py: Env-var config (OM1_WEBSIM_HOST/PORT, OM1_LOG_LEVEL) Known limitation: WebSim.py hardcodes port 8000 in _run_server (L509). OM1_WEBSIM_PORT is plumbed correctly on our side but won't take effect until WebSim reads its SimulatorConfig. Follow-up PR candidate. --- src/expose/__init__.py | 0 src/expose/__main__.py | 6 ++ src/expose/config.py | 24 ++++++++ src/expose/server.py | 111 +++++++++++++++++++++++++++++++++++ src/expose/tools.py | 79 +++++++++++++++++++++++++ src/expose/websim_adapter.py | 49 ++++++++++++++++ 6 files changed, 269 insertions(+) create mode 100644 src/expose/__init__.py create mode 100644 src/expose/__main__.py create mode 100644 src/expose/config.py create mode 100644 src/expose/server.py create mode 100644 src/expose/tools.py create mode 100644 src/expose/websim_adapter.py diff --git a/src/expose/__init__.py b/src/expose/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/expose/__main__.py b/src/expose/__main__.py new file mode 100644 index 000000000..2bcc48bdd --- /dev/null +++ b/src/expose/__main__.py @@ -0,0 +1,6 @@ +"""Entry point for `python -m expose`.""" + +from expose.server import main + +if __name__ == "__main__": + main() diff --git a/src/expose/config.py b/src/expose/config.py new file mode 100644 index 000000000..d8a0e5f91 --- /dev/null +++ b/src/expose/config.py @@ -0,0 +1,24 @@ +"""Runtime configuration for the OM1 MCP server.""" + +from __future__ import annotations + +import os +from dataclasses import dataclass + + +@dataclass(frozen=True) +class ServerConfig: + """Immutable runtime configuration for the OM1 MCP server.""" + + websim_host: str = "127.0.0.1" + websim_port: int = 8000 + log_level: str = "WARNING" + + @classmethod + def from_env(cls) -> "ServerConfig": + """Build a ServerConfig from OM1_WEBSIM_HOST / OM1_WEBSIM_PORT / OM1_LOG_LEVEL env vars.""" + return cls( + websim_host=os.getenv("OM1_WEBSIM_HOST", "127.0.0.1"), + websim_port=int(os.getenv("OM1_WEBSIM_PORT", "8000")), + log_level=os.getenv("OM1_LOG_LEVEL", "WARNING"), + ) diff --git a/src/expose/server.py b/src/expose/server.py new file mode 100644 index 000000000..f686ee68a --- /dev/null +++ b/src/expose/server.py @@ -0,0 +1,111 @@ +"""OM1 MCP server: expose OM1 tools to external MCP clients.""" + +from __future__ import annotations + +import asyncio +import logging +from importlib.metadata import PackageNotFoundError, version +from typing import Any + +import mcp.server.stdio +import mcp.types as types +from mcp.server import NotificationOptions, Server +from mcp.server.models import InitializationOptions + +from expose.config import ServerConfig +from expose.tools import Emotion, MoveAction, build_tool_definitions +from expose.websim_adapter import WebSimAdapter + +logger = logging.getLogger(__name__) + + +def _get_version() -> str: + try: + return version("om1") + except PackageNotFoundError: + return "0.0.0+unknown" + + +def _err(msg: str) -> list[types.TextContent]: + return [types.TextContent(type="text", text=f"Error: {msg}")] + + +def _ok(msg: str) -> list[types.TextContent]: + return [types.TextContent(type="text", text=msg)] + + +async def handle_tool_call( + name: str, arguments: dict[str, Any] | None, adapter: WebSimAdapter +) -> list[types.TextContent]: + """Dispatch an MCP tool call to the adapter and return the tool result as TextContent. + + Invalid inputs return an error TextContent instead of raising, so the MCP + client observes a structured failure rather than a transport-level crash. + """ + args = arguments or {} + try: + if name == "om1_move": + action = MoveAction(args["action"]) + adapter.move(action.value) + return _ok(f"Executed move: {action.value}") + + if name == "om1_speak": + text = args.get("text", "").strip() + if not text: + raise ValueError("text must be non-empty") + adapter.speak(text) + return _ok(f"Spoke: {text}") + + if name == "om1_face": + emotion = Emotion(args["emotion"]) + adapter.face(emotion.value) + return _ok(f"Changed emotion to: {emotion.value}") + + return _err(f"Unknown tool: {name}") + except (KeyError, ValueError) as e: + return _err(str(e)) + + +def build_server(adapter: WebSimAdapter) -> Server: + """Wire an mcp.server.Server with list_tools/call_tool handlers backed by ``adapter``.""" + server = Server("om1_mcp_server") + + @server.list_tools() + async def _list_tools() -> list[types.Tool]: + return build_tool_definitions() + + @server.call_tool() + async def _call_tool(name: str, arguments: dict[str, Any] | None) -> list[types.TextContent]: + return await handle_tool_call(name, arguments, adapter) + + return server + + +async def run(config: ServerConfig) -> None: + """Initialise logging/WebSim and serve the MCP protocol over stdio until stdin closes.""" + logging.basicConfig(level=config.log_level.upper()) + adapter = WebSimAdapter.create(config.websim_host, config.websim_port) + server = build_server(adapter) + + async with mcp.server.stdio.stdio_server() as (read, write): + await server.run( + read, + write, + InitializationOptions( + server_name="om1_mcp_server", + server_version=_get_version(), + capabilities=server.get_capabilities( + notification_options=NotificationOptions(), + experimental_capabilities={}, + ), + ), + ) + + +def main() -> None: + """CLI entry point (``om1-mcp-server``): read env config and run the server.""" + asyncio.run(run(ServerConfig.from_env())) + + +if __name__ == "__main__": + main() diff --git a/src/expose/tools.py b/src/expose/tools.py new file mode 100644 index 000000000..e83778a7d --- /dev/null +++ b/src/expose/tools.py @@ -0,0 +1,79 @@ +"""MCP tool definitions exposed by the OM1 server.""" + +from __future__ import annotations + +from enum import Enum + +import mcp.types as types + + +class MoveAction(str, Enum): + """Supported values for the ``om1_move`` tool's ``action`` parameter.""" + + FORWARD = "forward" + BACKWARD = "backward" + TURN_LEFT = "turn_left" + TURN_RIGHT = "turn_right" + SPIN = "spin" + SIT = "sit" + STAND = "stand" + IDLE = "idle" + + +class Emotion(str, Enum): + """Supported values for the ``om1_face`` tool's ``emotion`` parameter.""" + + JOY = "joy" + SMILE = "smile" + PONDER = "ponder" + ALERT = "alert" + SAD = "sad" + + +def build_tool_definitions() -> list[types.Tool]: + """Return the list of MCP Tool schemas exposed by the OM1 server.""" + return [ + types.Tool( + name="om1_move", + description="Move the OM1 agent.", + inputSchema={ + "type": "object", + "properties": { + "action": { + "type": "string", + "enum": [a.value for a in MoveAction], + } + }, + "required": ["action"], + }, + ), + types.Tool( + name="om1_speak", + description="Make the OM1 agent speak text aloud.", + inputSchema={ + "type": "object", + "properties": { + "text": { + "type": "string", + "minLength": 1, + "maxLength": 500, + } + }, + "required": ["text"], + }, + ), + types.Tool( + name="om1_face", + description="Change the OM1 agent's facial emotion.", + inputSchema={ + "type": "object", + "properties": { + "emotion": { + "type": "string", + "enum": [e.value for e in Emotion], + } + }, + "required": ["emotion"], + }, + ), + ] diff --git a/src/expose/websim_adapter.py b/src/expose/websim_adapter.py new file mode 100644 index 000000000..711d9c085 --- /dev/null +++ b/src/expose/websim_adapter.py @@ -0,0 +1,49 @@ +"""Adapter that wraps WebSim behind a minimal, testable interface.""" + +from __future__ import annotations + +import socket +from contextlib import closing +from typing import Any + +from llm.output_model import Action + + +class WebSimAdapter: + """Thin wrapper exposing only the move/speak/face operations the MCP server needs.""" + + def __init__(self, websim: Any): + """Wrap an already-constructed WebSim-like object (used directly in tests with mocks).""" + self._websim = websim + + @classmethod + def create(cls, host: str, port: int) -> "WebSimAdapter": + """Factory that also starts a real WebSim on (host, port).""" + cls.ensure_port_free(host, port) + # Imported here so unit tests for the adapter don't need WebSim. + from simulators.base import SimulatorConfig + from simulators.plugins.WebSim import WebSim + + return cls(WebSim(SimulatorConfig(host=host, port=port))) + + @staticmethod + def ensure_port_free(host: str, port: int) -> None: + """Raise RuntimeError if ``(host, port)`` is already accepting connections.""" + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: + if s.connect_ex((host, port)) == 0: + raise RuntimeError( + f"Port {port} on {host} is already in use. " + f"Set OM1_WEBSIM_PORT to a free port or kill the conflict." + ) + + def move(self, action: str) -> None: + """Dispatch a ``move`` Action to the underlying WebSim.""" + self._websim.sim([Action(type="move", value=action)]) + + def speak(self, text: str) -> None: + """Dispatch a ``speak`` Action to the underlying WebSim.""" + self._websim.sim([Action(type="speak", value=text)]) + + def face(self, emotion: str) -> None: + """Dispatch an ``emotion`` Action to the underlying WebSim.""" + self._websim.sim([Action(type="emotion", value=emotion)]) From 2050c31cf2f53f48ec310a012008b7a83e1d417d Mon Sep 17 00:00:00 2001 From: Haofeng Liang Date: Sun, 19 Apr 2026 00:12:22 +0800 Subject: [PATCH 2/3] test(mcp): add unit tests for expose module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 24 tests covering: - tools: schema structure, enum constraints on om1_move/om1_face, om1_speak text length bounds - config: env-var loading and defaults, ValueError on invalid port - websim_adapter: move/speak/face dispatch via MagicMock, real socket check for ensure_port_free (pass + raise paths) - server: handle_tool_call 8 paths — 3 happy paths, enum rejection, empty-text rejection, unknown tool, KeyError fallback All tests use real code (no mocking of our own modules); only the WebSim boundary is mocked via adapter injection. --- tests/expose/__init__.py | 0 tests/expose/test_config.py | 36 +++++++++++++++ tests/expose/test_server.py | 69 +++++++++++++++++++++++++++++ tests/expose/test_tools.py | 47 ++++++++++++++++++++ tests/expose/test_websim_adapter.py | 56 +++++++++++++++++++++++ 5 files changed, 208 insertions(+) create mode 100644 tests/expose/__init__.py create mode 100644 tests/expose/test_config.py create mode 100644 tests/expose/test_server.py create mode 100644 tests/expose/test_tools.py create mode 100644 tests/expose/test_websim_adapter.py diff --git a/tests/expose/__init__.py b/tests/expose/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/expose/test_config.py b/tests/expose/test_config.py new file mode 100644 index 000000000..dc69152aa --- /dev/null +++ b/tests/expose/test_config.py @@ -0,0 +1,36 @@ +"""Tests for expose.config — server configuration from env vars.""" + +import pytest + +from expose.config import ServerConfig + + +class TestDefaults: + def test_defaults_when_no_env_vars(self, monkeypatch): + for var in ("OM1_WEBSIM_HOST", "OM1_WEBSIM_PORT", "OM1_LOG_LEVEL"): + monkeypatch.delenv(var, raising=False) + cfg = ServerConfig.from_env() + assert cfg.websim_host == "127.0.0.1" + assert cfg.websim_port == 8000 + assert cfg.log_level == "WARNING" + + +class TestEnvOverrides: + def test_host_override(self, monkeypatch): + monkeypatch.setenv("OM1_WEBSIM_HOST", "0.0.0.0") + assert ServerConfig.from_env().websim_host == "0.0.0.0" + + def test_port_override_parses_int(self, monkeypatch): + monkeypatch.setenv("OM1_WEBSIM_PORT", "9000") + assert ServerConfig.from_env().websim_port == 9000 + + def test_log_level_override(self, monkeypatch): + monkeypatch.setenv("OM1_LOG_LEVEL", "DEBUG") + assert ServerConfig.from_env().log_level == "DEBUG" + + +class TestValidation: + def test_invalid_port_raises(self, monkeypatch): + monkeypatch.setenv("OM1_WEBSIM_PORT", "not-a-number") + with pytest.raises(ValueError): + ServerConfig.from_env() diff --git a/tests/expose/test_server.py b/tests/expose/test_server.py new file mode 100644 index 000000000..77c9e7dbd --- /dev/null +++ b/tests/expose/test_server.py @@ -0,0 +1,69 @@ +"""Tests for expose.server — tool dispatch and MCP wiring.""" + +from unittest.mock import MagicMock + +import pytest + +from expose.server import handle_tool_call + + +@pytest.fixture +def adapter(): + return MagicMock() + + +class TestMove: + @pytest.mark.asyncio + async def test_valid_move_calls_adapter(self, adapter): + result = await handle_tool_call("om1_move", {"action": "forward"}, adapter) + adapter.move.assert_called_once_with("forward") + assert len(result) == 1 + assert "forward" in result[0].text + + @pytest.mark.asyncio + async def test_invalid_move_returns_error_not_raise(self, adapter): + result = await handle_tool_call("om1_move", {"action": "moonwalk"}, adapter) + adapter.move.assert_not_called() + assert result[0].text.lower().startswith("error") + + @pytest.mark.asyncio + async def test_missing_action_arg_returns_error(self, adapter): + result = await handle_tool_call("om1_move", {}, adapter) + adapter.move.assert_not_called() + assert "error" in result[0].text.lower() + + +class TestSpeak: + @pytest.mark.asyncio + async def test_valid_speak_calls_adapter(self, adapter): + result = await handle_tool_call("om1_speak", {"text": "hi"}, adapter) + adapter.speak.assert_called_once_with("hi") + assert "hi" in result[0].text + + @pytest.mark.asyncio + async def test_empty_text_returns_error(self, adapter): + result = await handle_tool_call("om1_speak", {"text": " "}, adapter) + adapter.speak.assert_not_called() + assert "error" in result[0].text.lower() + + +class TestFace: + @pytest.mark.asyncio + async def test_valid_emotion(self, adapter): + result = await handle_tool_call("om1_face", {"emotion": "joy"}, adapter) + adapter.face.assert_called_once_with("joy") + assert "joy" in result[0].text + + @pytest.mark.asyncio + async def test_invalid_emotion_returns_error(self, adapter): + result = await handle_tool_call("om1_face", {"emotion": "angry"}, adapter) + adapter.face.assert_not_called() + assert "error" in result[0].text.lower() + + +class TestUnknownTool: + @pytest.mark.asyncio + async def test_unknown_tool_returns_error(self, adapter): + result = await handle_tool_call("om1_teleport", {}, adapter) + assert "error" in result[0].text.lower() + assert "om1_teleport" in result[0].text diff --git a/tests/expose/test_tools.py b/tests/expose/test_tools.py new file mode 100644 index 000000000..f4388f8d2 --- /dev/null +++ b/tests/expose/test_tools.py @@ -0,0 +1,47 @@ +"""Tests for expose.tools — MCP tool schema definitions.""" + +from expose.tools import Emotion, MoveAction, build_tool_definitions + + +def _tool(name: str): + return next(t for t in build_tool_definitions() if t.name == name) + + +class TestBuildToolDefinitions: + def test_returns_three_tools_with_expected_names(self): + tools = build_tool_definitions() + names = {t.name for t in tools} + assert names == {"om1_move", "om1_speak", "om1_face"} + + +class TestMoveSchema: + def test_move_action_param_is_enum_matching_move_action_enum(self): + schema = _tool("om1_move").inputSchema + action = schema["properties"]["action"] + assert set(action["enum"]) == {a.value for a in MoveAction} + assert schema["required"] == ["action"] + + def test_move_action_enum_contains_core_values(self): + values = {a.value for a in MoveAction} + assert {"forward", "backward", "turn_left", "turn_right", "spin"} <= values + + +class TestFaceSchema: + def test_face_emotion_param_is_enum_matching_emotion_enum(self): + schema = _tool("om1_face").inputSchema + emotion = schema["properties"]["emotion"] + assert set(emotion["enum"]) == {e.value for e in Emotion} + assert schema["required"] == ["emotion"] + + def test_emotion_enum_has_five_values(self): + assert {e.value for e in Emotion} == {"joy", "smile", "ponder", "alert", "sad"} + + +class TestSpeakSchema: + def test_speak_text_has_length_constraints(self): + schema = _tool("om1_speak").inputSchema + text = schema["properties"]["text"] + assert text["type"] == "string" + assert text["minLength"] == 1 + assert text["maxLength"] == 500 + assert schema["required"] == ["text"] diff --git a/tests/expose/test_websim_adapter.py b/tests/expose/test_websim_adapter.py new file mode 100644 index 000000000..9323cd66c --- /dev/null +++ b/tests/expose/test_websim_adapter.py @@ -0,0 +1,56 @@ +"""Tests for expose.websim_adapter — WebSim wrapper.""" + +import socket +from unittest.mock import MagicMock + +import pytest + +from expose.websim_adapter import WebSimAdapter + + +@pytest.fixture +def fake_websim(): + return MagicMock() + + +class TestToolCalls: + def test_move_sends_move_action_to_websim(self, fake_websim): + adapter = WebSimAdapter(fake_websim) + adapter.move("forward") + args, _ = fake_websim.sim.call_args + actions = args[0] + assert len(actions) == 1 + assert actions[0].type == "move" + assert actions[0].value == "forward" + + def test_speak_sends_speak_action(self, fake_websim): + adapter = WebSimAdapter(fake_websim) + adapter.speak("hello") + actions = fake_websim.sim.call_args[0][0] + assert actions[0].type == "speak" + assert actions[0].value == "hello" + + def test_face_sends_emotion_action(self, fake_websim): + adapter = WebSimAdapter(fake_websim) + adapter.face("joy") + actions = fake_websim.sim.call_args[0][0] + assert actions[0].type == "emotion" + assert actions[0].value == "joy" + + +class TestEnsurePortFree: + def test_passes_when_port_unused(self): + # Port 0 tells OS to pick a free port; close it and check a likely-free port + WebSimAdapter.ensure_port_free("127.0.0.1", 59999) + + def test_raises_when_port_occupied(self): + # Hold a port, then assert the check raises + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind(("127.0.0.1", 0)) + s.listen(1) + held_port = s.getsockname()[1] + try: + with pytest.raises(RuntimeError, match="already in use"): + WebSimAdapter.ensure_port_free("127.0.0.1", held_port) + finally: + s.close() From 997cac120260bdef33bab08e8d5e837661ffe365 Mon Sep 17 00:00:00 2001 From: Haofeng Liang Date: Sun, 19 Apr 2026 00:12:30 +0800 Subject: [PATCH 3/3] feat(mcp): add om1-mcp-server CLI entry point Declare project.scripts so users can launch the server with a single command after `uv pip install -e .`: om1-mcp-server This simplifies Claude Desktop / Cursor configuration from multi-line (with explicit python path, PYTHONPATH, cwd, and -m module) to a single 'command' field pointing at the generated binary. --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 89672e412..1f8759682 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,9 @@ dds = [ ] macos = ["osascript"] +[project.scripts] +om1-mcp-server = "expose.server:main" + [dependency-groups] dev = [ "black==26.3.1",