OpenHands · simonrosenberg · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026 · Mar 16, 2026
diff --git a/.github/workflows/run-eval.yml b/.github/workflows/run-eval.yml
@@ -97,6 +97,18 @@ on:
                     - gemini
                     - gpt5
                     - planning
+            agent_type:
+                description: >-
+                    Agent type: 'default' for standard Agent,
+                    'acp-claude' for ACPAgent with Claude Code,
+                    'acp-codex' for ACPAgent with Codex.
+                required: false
+                default: default
+                type: choice
+                options:
+                    - default
+                    - acp-claude
+                    - acp-codex
 
 
 env:
@@ -319,6 +331,7 @@ jobs:
                   ENABLE_CONVERSATION_EVENT_LOGGING: ${{ github.event.inputs.enable_conversation_event_logging || false }}
                   MAX_RETRIES: ${{ github.event.inputs.max_retries || '3' }}
                   TOOL_PRESET: ${{ github.event.inputs.tool_preset || 'default' }}
+                  AGENT_TYPE: ${{ github.event.inputs.agent_type || 'default' }}
                   TRIGGERED_BY: ${{ github.actor }}
               run: |
                   echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, tool preset: $TOOL_PRESET)"
@@ -337,8 +350,9 @@ jobs:
                     --argjson enable_conversation_event_logging "$ENABLE_CONVERSATION_EVENT_LOGGING" \
                     --arg max_retries "$MAX_RETRIES" \
                     --arg tool_preset "$TOOL_PRESET" \
+                    --arg agent_type "$AGENT_TYPE" \
                     --arg triggered_by "$TRIGGERED_BY" \
-                    '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, triggered_by: $triggered_by}}')
+                    '{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, triggered_by: $triggered_by}}')
                   RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \
                     -H "Authorization: token $PAT_TOKEN" \
                     -H "Accept: application/vnd.github+json" \

diff --git a/examples/01_standalone_sdk/40_acp_agent_example.py b/examples/01_standalone_sdk/40_acp_agent_example.py
@@ -7,7 +7,7 @@
 
 Prerequisites:
     - Node.js / npx available
-    - Claude Code CLI authenticated (or CLAUDE_API_KEY set)
+    - ANTHROPIC_BASE_URL and ANTHROPIC_API_KEY set (can point to LiteLLM proxy)
 
 Usage:
     uv run python examples/01_standalone_sdk/40_acp_agent_example.py
@@ -38,6 +38,9 @@
         "Based on what you just saw, which agent class is the newest addition?"
     )
     print(f"ask_agent response: {response}")
+    # Report cost (ACP server reports usage via session_update notifications)
+    cost = agent.llm.metrics.accumulated_cost
+    print(f"EXAMPLE_COST: {cost:.4f}")
 finally:
     # Clean up the ACP server subprocess
     agent.close()

diff --git a/examples/02_remote_agent_server/09_acp_agent_with_remote_runtime.py b/examples/02_remote_agent_server/09_acp_agent_with_remote_runtime.py
@@ -0,0 +1,87 @@
+"""Example: ACPAgent with Remote Runtime via API.
+
+This example demonstrates running an ACPAgent (Claude Code via ACP protocol)
+in a remote sandboxed environment via Runtime API. It follows the same pattern
+as 04_convo_with_api_sandboxed_server.py but uses ACPAgent instead of the
+default LLM-based Agent.
+
+Usage:
+  uv run examples/02_remote_agent_server/09_acp_agent_with_remote_runtime.py
+
+Requirements:
+  - LLM_BASE_URL: LiteLLM proxy URL (routes Claude Code requests)
+  - LLM_API_KEY: LiteLLM virtual API key
+  - RUNTIME_API_KEY: API key for runtime API access
+"""
+
+import os
+import time
+
+from openhands.sdk import (
+    Conversation,
+    RemoteConversation,
+    get_logger,
+)
+from openhands.sdk.agent import ACPAgent
+from openhands.workspace import APIRemoteWorkspace
+
+
+logger = get_logger(__name__)
+
+
+# ACP agents (Claude Code) route through LiteLLM proxy
+llm_base_url = os.getenv("LLM_BASE_URL")
+llm_api_key = os.getenv("LLM_API_KEY")
+assert llm_base_url and llm_api_key, "LLM_BASE_URL and LLM_API_KEY required"
+
+# Set ANTHROPIC_* vars so Claude Code routes through LiteLLM
+os.environ["ANTHROPIC_BASE_URL"] = llm_base_url
+os.environ["ANTHROPIC_API_KEY"] = llm_api_key
+
+runtime_api_key = os.getenv("RUNTIME_API_KEY")
+assert runtime_api_key, "RUNTIME_API_KEY required"
+
+# If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
+# Otherwise, use the latest image from main
+server_image_sha = os.getenv("GITHUB_SHA") or "main"
+server_image = f"ghcr.io/openhands/agent-server:{server_image_sha[:7]}-python-amd64"
+logger.info(f"Using server image: {server_image}")
+
+with APIRemoteWorkspace(
+    runtime_api_url=os.getenv("RUNTIME_API_URL", "https://runtime.eval.all-hands.dev"),
+    runtime_api_key=runtime_api_key,
+    server_image=server_image,
+    image_pull_policy="Always",
+    target_type="binary",  # CI builds binary target images
+    forward_env=["ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY"],
+) as workspace:
+    agent = ACPAgent(
+        acp_command=["claude-agent-acp"],  # Pre-installed in Docker image
+    )
+
+    received_events: list = []
+    last_event_time = {"ts": time.time()}
+
+    def event_callback(event) -> None:
+        received_events.append(event)
+        last_event_time["ts"] = time.time()
+
+    conversation = Conversation(
+        agent=agent, workspace=workspace, callbacks=[event_callback]
+    )
+    assert isinstance(conversation, RemoteConversation)
+
+    try:
+        conversation.send_message(
+            "List the files in /workspace and describe what you see."
+        )
+        conversation.run()
+
+        while time.time() - last_event_time["ts"] < 2.0:
+            time.sleep(0.1)
+
+        # Report cost
+        cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost
+        print(f"EXAMPLE_COST: {cost:.4f}")
+    finally:
+        conversation.close()
diff --git a/openhands-agent-server/openhands/agent_server/api.py b/openhands-agent-server/openhands/agent_server/api.py
@@ -15,6 +15,7 @@
     get_default_config,
 )
 from openhands.agent_server.conversation_router import conversation_router
+from openhands.agent_server.conversation_router_acp import conversation_router_acp
 from openhands.agent_server.conversation_service import (
     get_default_conversation_service,
 )
@@ -199,6 +200,7 @@ def _add_api_routes(app: FastAPI, config: Config) -> None:
     api_router = APIRouter(prefix="/api", dependencies=dependencies)
     api_router.include_router(event_router)
     api_router.include_router(conversation_router)
+    api_router.include_router(conversation_router_acp)
     api_router.include_router(tool_router)
     api_router.include_router(bash_router)
     api_router.include_router(git_router)

diff --git a/openhands-agent-server/openhands/agent_server/conversation_router_acp.py b/openhands-agent-server/openhands/agent_server/conversation_router_acp.py
@@ -0,0 +1,121 @@
+"""ACP-capable conversation routes for the schema-sensitive endpoints."""
+
+from typing import Annotated
+from uuid import UUID
+
+from fastapi import APIRouter, Body, Depends, HTTPException, Query, Response, status
+from pydantic import SecretStr
+
+from openhands.agent_server.conversation_service import ConversationService
+from openhands.agent_server.dependencies import get_conversation_service
+from openhands.agent_server.models import (
+    ACPConversationInfo,
+    ACPConversationPage,
+    ConversationSortOrder,
+    SendMessageRequest,
+    StartACPConversationRequest,
+)
+from openhands.sdk import LLM, Agent, TextContent
+from openhands.sdk.agent.acp_agent import ACPAgent
+from openhands.sdk.conversation.state import ConversationExecutionStatus
+from openhands.sdk.workspace import LocalWorkspace
+from openhands.tools.preset.default import get_default_tools
+
+
+conversation_router_acp = APIRouter(
+    prefix="/acp/conversations",
+    tags=["ACP Conversations"],
+)
+
+START_ACP_CONVERSATION_EXAMPLES = [
+    StartACPConversationRequest(
+        agent=Agent(
+            llm=LLM(
+                usage_id="your-llm-service",
+                model="your-model-provider/your-model-name",
+                api_key=SecretStr("your-api-key-here"),
+            ),
+            tools=get_default_tools(enable_browser=True),
+        ),
+        workspace=LocalWorkspace(working_dir="workspace/project"),
+        initial_message=SendMessageRequest(
+            role="user", content=[TextContent(text="Flip a coin!")]
+        ),
+    ).model_dump(exclude_defaults=True, mode="json"),
+    StartACPConversationRequest(
+        agent=ACPAgent(acp_command=["npx", "-y", "claude-agent-acp"]),
+        workspace=LocalWorkspace(working_dir="workspace/project"),
+        initial_message=SendMessageRequest(
+            role="user",
+            content=[TextContent(text="Inspect the repository and summarize it.")],
+        ),
+    ).model_dump(exclude_defaults=True, mode="json"),
+]
+
+
+@conversation_router_acp.get("/search")
+async def search_acp_conversations(
+    page_id: Annotated[
+        str | None,
+        Query(title="Optional next_page_id from the previously returned page"),
+    ] = None,
+    limit: Annotated[
+        int,
+        Query(title="The max number of results in the page", gt=0, lte=100),
+    ] = 100,
+    status: Annotated[
+        ConversationExecutionStatus | None,
+        Query(title="Optional filter by conversation execution status"),
+    ] = None,
+    sort_order: Annotated[
+        ConversationSortOrder,
+        Query(title="Sort order for conversations"),
+    ] = ConversationSortOrder.CREATED_AT_DESC,
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> ACPConversationPage:
+    """Search conversations using the ACP-capable contract."""
+    assert limit > 0
+    assert limit <= 100
+    return await conversation_service.search_acp_conversations(
+        page_id, limit, status, sort_order
+    )
+
+
+@conversation_router_acp.get(
+    "/{conversation_id}",
+    responses={404: {"description": "Item not found"}},
+)
+async def get_acp_conversation(
+    conversation_id: UUID,
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> ACPConversationInfo:
+    """Get a conversation using the ACP-capable contract."""
+    conversation = await conversation_service.get_acp_conversation(conversation_id)
+    if conversation is None:
+        raise HTTPException(status.HTTP_404_NOT_FOUND)
+    return conversation
+
+
+@conversation_router_acp.get("")
+async def batch_get_acp_conversations(
+    ids: Annotated[list[UUID], Query()],
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> list[ACPConversationInfo | None]:
+    """Batch get conversations using the ACP-capable contract."""
+    assert len(ids) < 100
+    return await conversation_service.batch_get_acp_conversations(ids)
+
+
+@conversation_router_acp.post("")
+async def start_acp_conversation(
+    request: Annotated[
+        StartACPConversationRequest,
+        Body(examples=START_ACP_CONVERSATION_EXAMPLES),
+    ],
+    response: Response,
+    conversation_service: ConversationService = Depends(get_conversation_service),
+) -> ACPConversationInfo:
+    """Start a conversation using the ACP-capable contract."""
+    info, is_new = await conversation_service.start_acp_conversation(request)
+    response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
+    return info