Skip to content
16 changes: 15 additions & 1 deletion .github/workflows/run-eval.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,18 @@ on:
- gemini
- gpt5
- planning
agent_type:
description: >-
Agent type: 'default' for standard Agent,
'acp-claude' for ACPAgent with Claude Code,
'acp-codex' for ACPAgent with Codex.
required: false
default: default
type: choice
options:
- default
- acp-claude
- acp-codex


env:
Expand Down Expand Up @@ -319,6 +331,7 @@ jobs:
ENABLE_CONVERSATION_EVENT_LOGGING: ${{ github.event.inputs.enable_conversation_event_logging || false }}
MAX_RETRIES: ${{ github.event.inputs.max_retries || '3' }}
TOOL_PRESET: ${{ github.event.inputs.tool_preset || 'default' }}
AGENT_TYPE: ${{ github.event.inputs.agent_type || 'default' }}
TRIGGERED_BY: ${{ github.actor }}
run: |
echo "Dispatching evaluation workflow with SDK commit: $SDK_SHA (benchmark: $BENCHMARK, eval branch: $EVAL_BRANCH, benchmarks branch: $BENCHMARKS_BRANCH, tool preset: $TOOL_PRESET)"
Expand All @@ -337,8 +350,9 @@ jobs:
--argjson enable_conversation_event_logging "$ENABLE_CONVERSATION_EVENT_LOGGING" \
--arg max_retries "$MAX_RETRIES" \
--arg tool_preset "$TOOL_PRESET" \
--arg agent_type "$AGENT_TYPE" \
--arg triggered_by "$TRIGGERED_BY" \
'{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, triggered_by: $triggered_by}}')
'{ref: $ref, inputs: {sdk_commit: $sdk, eval_limit: $eval_limit, models_json: ($models | tostring), trigger_reason: $reason, pr_number: $pr, benchmarks_branch: $benchmarks, benchmark: $benchmark, instance_ids: $instance_ids, num_infer_workers: $num_infer_workers, num_eval_workers: $num_eval_workers, enable_conversation_event_logging: $enable_conversation_event_logging, max_retries: $max_retries, tool_preset: $tool_preset, agent_type: $agent_type, triggered_by: $triggered_by}}')
RESPONSE=$(curl -sS -o /tmp/dispatch.out -w "%{http_code}" -X POST \
-H "Authorization: token $PAT_TOKEN" \
-H "Accept: application/vnd.github+json" \
Expand Down
5 changes: 4 additions & 1 deletion examples/01_standalone_sdk/40_acp_agent_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

Prerequisites:
- Node.js / npx available
- Claude Code CLI authenticated (or CLAUDE_API_KEY set)
- ANTHROPIC_BASE_URL and ANTHROPIC_API_KEY set (can point to LiteLLM proxy)

Usage:
uv run python examples/01_standalone_sdk/40_acp_agent_example.py
Expand Down Expand Up @@ -38,6 +38,9 @@
"Based on what you just saw, which agent class is the newest addition?"
)
print(f"ask_agent response: {response}")
# Report cost (ACP server reports usage via session_update notifications)
cost = agent.llm.metrics.accumulated_cost
print(f"EXAMPLE_COST: {cost:.4f}")
finally:
# Clean up the ACP server subprocess
agent.close()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Example: ACPAgent with Remote Runtime via API.

This example demonstrates running an ACPAgent (Claude Code via ACP protocol)
in a remote sandboxed environment via Runtime API. It follows the same pattern
as 04_convo_with_api_sandboxed_server.py but uses ACPAgent instead of the
default LLM-based Agent.

Usage:
uv run examples/02_remote_agent_server/09_acp_agent_with_remote_runtime.py

Requirements:
- LLM_BASE_URL: LiteLLM proxy URL (routes Claude Code requests)
- LLM_API_KEY: LiteLLM virtual API key
- RUNTIME_API_KEY: API key for runtime API access
"""

import os
import time

from openhands.sdk import (
Conversation,
RemoteConversation,
get_logger,
)
from openhands.sdk.agent import ACPAgent
from openhands.workspace import APIRemoteWorkspace


logger = get_logger(__name__)


# ACP agents (Claude Code) route through LiteLLM proxy
llm_base_url = os.getenv("LLM_BASE_URL")
llm_api_key = os.getenv("LLM_API_KEY")
assert llm_base_url and llm_api_key, "LLM_BASE_URL and LLM_API_KEY required"

# Set ANTHROPIC_* vars so Claude Code routes through LiteLLM
os.environ["ANTHROPIC_BASE_URL"] = llm_base_url
os.environ["ANTHROPIC_API_KEY"] = llm_api_key

runtime_api_key = os.getenv("RUNTIME_API_KEY")
assert runtime_api_key, "RUNTIME_API_KEY required"

# If GITHUB_SHA is set (e.g. running in CI of a PR), use that to ensure consistency
# Otherwise, use the latest image from main
server_image_sha = os.getenv("GITHUB_SHA") or "main"
server_image = f"ghcr.io/openhands/agent-server:{server_image_sha[:7]}-python-amd64"
logger.info(f"Using server image: {server_image}")

with APIRemoteWorkspace(
runtime_api_url=os.getenv("RUNTIME_API_URL", "https://runtime.eval.all-hands.dev"),
runtime_api_key=runtime_api_key,
server_image=server_image,
image_pull_policy="Always",
target_type="binary", # CI builds binary target images
forward_env=["ANTHROPIC_BASE_URL", "ANTHROPIC_API_KEY"],
) as workspace:
agent = ACPAgent(
acp_command=["claude-agent-acp"], # Pre-installed in Docker image
)

received_events: list = []
last_event_time = {"ts": time.time()}

def event_callback(event) -> None:
received_events.append(event)
last_event_time["ts"] = time.time()

conversation = Conversation(
agent=agent, workspace=workspace, callbacks=[event_callback]
)
assert isinstance(conversation, RemoteConversation)

try:
conversation.send_message(
"List the files in /workspace and describe what you see."
)
conversation.run()

while time.time() - last_event_time["ts"] < 2.0:
time.sleep(0.1)

# Report cost
cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost
print(f"EXAMPLE_COST: {cost:.4f}")
finally:
conversation.close()
2 changes: 2 additions & 0 deletions openhands-agent-server/openhands/agent_server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
get_default_config,
)
from openhands.agent_server.conversation_router import conversation_router
from openhands.agent_server.conversation_router_acp import conversation_router_acp
from openhands.agent_server.conversation_service import (
get_default_conversation_service,
)
Expand Down Expand Up @@ -199,6 +200,7 @@ def _add_api_routes(app: FastAPI, config: Config) -> None:
api_router = APIRouter(prefix="/api", dependencies=dependencies)
api_router.include_router(event_router)
api_router.include_router(conversation_router)
api_router.include_router(conversation_router_acp)
api_router.include_router(tool_router)
api_router.include_router(bash_router)
api_router.include_router(git_router)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""ACP-capable conversation routes for the schema-sensitive endpoints."""

from typing import Annotated
from uuid import UUID

from fastapi import APIRouter, Body, Depends, HTTPException, Query, Response, status
from pydantic import SecretStr

from openhands.agent_server.conversation_service import ConversationService
from openhands.agent_server.dependencies import get_conversation_service
from openhands.agent_server.models import (
ACPConversationInfo,
ACPConversationPage,
ConversationSortOrder,
SendMessageRequest,
StartACPConversationRequest,
)
from openhands.sdk import LLM, Agent, TextContent
from openhands.sdk.agent.acp_agent import ACPAgent
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.workspace import LocalWorkspace
from openhands.tools.preset.default import get_default_tools


conversation_router_acp = APIRouter(
prefix="/acp/conversations",
tags=["ACP Conversations"],
)

START_ACP_CONVERSATION_EXAMPLES = [
StartACPConversationRequest(
agent=Agent(
llm=LLM(
usage_id="your-llm-service",
model="your-model-provider/your-model-name",
api_key=SecretStr("your-api-key-here"),
),
tools=get_default_tools(enable_browser=True),
),
workspace=LocalWorkspace(working_dir="workspace/project"),
initial_message=SendMessageRequest(
role="user", content=[TextContent(text="Flip a coin!")]
),
).model_dump(exclude_defaults=True, mode="json"),
StartACPConversationRequest(
agent=ACPAgent(acp_command=["npx", "-y", "claude-agent-acp"]),
workspace=LocalWorkspace(working_dir="workspace/project"),
initial_message=SendMessageRequest(
role="user",
content=[TextContent(text="Inspect the repository and summarize it.")],
),
).model_dump(exclude_defaults=True, mode="json"),
]


@conversation_router_acp.get("/search")
async def search_acp_conversations(
page_id: Annotated[
str | None,
Query(title="Optional next_page_id from the previously returned page"),
] = None,
limit: Annotated[
int,
Query(title="The max number of results in the page", gt=0, lte=100),
] = 100,
status: Annotated[
ConversationExecutionStatus | None,
Query(title="Optional filter by conversation execution status"),
] = None,
sort_order: Annotated[
ConversationSortOrder,
Query(title="Sort order for conversations"),
] = ConversationSortOrder.CREATED_AT_DESC,
conversation_service: ConversationService = Depends(get_conversation_service),
) -> ACPConversationPage:
"""Search conversations using the ACP-capable contract."""
assert limit > 0
assert limit <= 100
return await conversation_service.search_acp_conversations(
page_id, limit, status, sort_order
)


@conversation_router_acp.get(
"/{conversation_id}",
responses={404: {"description": "Item not found"}},
)
async def get_acp_conversation(
conversation_id: UUID,
conversation_service: ConversationService = Depends(get_conversation_service),
) -> ACPConversationInfo:
"""Get a conversation using the ACP-capable contract."""
conversation = await conversation_service.get_acp_conversation(conversation_id)
if conversation is None:
raise HTTPException(status.HTTP_404_NOT_FOUND)
return conversation


@conversation_router_acp.get("")
async def batch_get_acp_conversations(
ids: Annotated[list[UUID], Query()],
conversation_service: ConversationService = Depends(get_conversation_service),
) -> list[ACPConversationInfo | None]:
"""Batch get conversations using the ACP-capable contract."""
assert len(ids) < 100
return await conversation_service.batch_get_acp_conversations(ids)


@conversation_router_acp.post("")
async def start_acp_conversation(
request: Annotated[
StartACPConversationRequest,
Body(examples=START_ACP_CONVERSATION_EXAMPLES),
],
response: Response,
conversation_service: ConversationService = Depends(get_conversation_service),
) -> ACPConversationInfo:
"""Start a conversation using the ACP-capable contract."""
info, is_new = await conversation_service.start_acp_conversation(request)
response.status_code = status.HTTP_201_CREATED if is_new else status.HTTP_200_OK
return info
Loading
Loading