Skip to content
Open
157 changes: 157 additions & 0 deletions sdk/python/examples/responses.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""End-to-end example for the OpenAI Responses API client.

Run with::

python examples/responses.py

Requires a loaded model and a started web service.
"""

from __future__ import annotations

import json

from foundry_local_sdk import (
Configuration,
FoundryLocalManager,
FunctionToolDefinition,
InputImageContent,
InputTextContent,
MessageItem,
)

MODEL_ALIAS = "phi-4-mini"


def setup():
config = Configuration(app_name="ResponsesExample")
FoundryLocalManager.initialize(config)
mgr = FoundryLocalManager.instance

mgr.download_and_register_eps()

model = mgr.catalog.get_model(MODEL_ALIAS)
if model is None:
raise RuntimeError(f"Model '{MODEL_ALIAS}' not found in catalog")
if not model.is_cached:
print(f"Downloading {MODEL_ALIAS}...")
model.download(progress_callback=lambda p: print(f" {p:.1f}%", end="\r"))
print()
print(f"Loading {model.alias}...", end="")
model.load()
print("loaded!")
mgr.start_web_service()

client = mgr.create_responses_client(model.id)
return mgr, model, client


def basic_create(client):
print("\n=== 1. Basic create ===")
resp = client.create("What is 2 + 2? Answer in one word.")
print(f"status={resp.status} text={resp.output_text!r}")


def streaming(client):
print("\n=== 2. Streaming ===")
print("assistant: ", end="", flush=True)
for event in client.create_streaming("Count from 1 to 5, separated by spaces."):
if event.type == "response.output_text.delta":
print(event.delta, end="", flush=True)
elif event.type == "response.completed":
response = getattr(event, "response", None)
usage = getattr(response, "usage", None) if response is not None else None
total = getattr(usage, "total_tokens", None) if usage is not None else None
print(f"\n(completed{f', {total} tokens' if total is not None else ''})")


def multi_turn(client):
print("\n=== 3. Multi-turn ===")
first = client.create("My favorite color is green. Remember that.", store=True)
print(f"first id={first.id!r}")
second = client.create(
"What is my favorite color?",
previous_response_id=first.id,
)
print(f"second: {second.output_text!r}")


def tool_calling(client):
print("\n=== 4. Tool calling ===")
tools = [
FunctionToolDefinition(
name="multiply_numbers",
description="Multiply two integers together.",
parameters={
"type": "object",
"properties": {
"a": {"type": "integer"},
"b": {"type": "integer"},
},
"required": ["a", "b"],
},
)
]
resp = client.create("What is 7 times 6?", tools=tools)

# Find a function_call item in the output (if the model produced one).
for item in resp.output:
if getattr(item, "type", None) == "function_call":
print(f"call {item.name}({item.arguments})")
args = json.loads(item.arguments)
answer = args["a"] * args["b"]
follow = client.create(
[
MessageItem(role="user", content="What is 7 times 6?"),
item,
# The function_call_output is sent back keyed by call_id
{"type": "function_call_output", "call_id": item.call_id, "output": str(answer)},
],
tools=tools,
)
print(f"final: {follow.output_text!r}")
return
print(f"no tool call — got text: {resp.output_text!r}")


def vision(client):
print("\n=== 5. Vision ===")
# Requires a vision-capable model. Replace with a real PNG to see real output.
tiny_png = bytes.fromhex(
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
"890000000d49444154789c6300010000000500010d0a2db40000000049454e44"
"ae426082"
)
msg = MessageItem(
role="user",
content=[
InputTextContent(text="Describe this image in one sentence."),
InputImageContent.from_bytes(tiny_png, "image/png"),
],
)
try:
resp = client.create([msg])
print(f"vision response: {resp.output_text!r}")
except Exception as e:
print(f"(skipped — model may not support vision: {e})")


def main():
mgr, model, client = setup()
try:
basic_create(client)
streaming(client)
multi_turn(client)
tool_calling(client)
vision(client)
finally:
mgr.stop_web_service()
model.unload()


if __name__ == "__main__":
main()
11 changes: 5 additions & 6 deletions sdk/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
pydantic>=2.0.0
requests>=2.32.4
openai>=2.24.0
# Standard native binary packages from the ORT-Nightly PyPI feed.
foundry-local-core==1.0.0rc1
onnxruntime-core==1.24.4; sys_platform != "linux"
onnxruntime-gpu==1.24.4; sys_platform == "linux"
onnxruntime-genai-core==0.13.1; sys_platform != "linux"
onnxruntime-genai-cuda==0.13.1; sys_platform == "linux"
foundry-local-core==1.0.0
onnxruntime-gpu==1.24.4; platform_system == "Linux"
Comment thread
MaanavD marked this conversation as resolved.
Outdated
onnxruntime-core==1.24.4; platform_system != "Linux"
onnxruntime-genai-cuda==0.13.1; platform_system == "Linux"
onnxruntime-genai-core==0.13.1; platform_system != "Linux"
62 changes: 61 additions & 1 deletion sdk/python/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,34 @@

from .configuration import Configuration
from .foundry_local_manager import FoundryLocalManager
from .openai.responses_client import ResponsesAPIError, ResponsesClient, ResponsesClientSettings
from .openai.responses_types import (
ContentPart,
DeleteResponseResult,
FunctionCallItem,
FunctionCallOutputItem,
FunctionToolDefinition,
InputFileContent,
InputImageContent,
InputItemsListResponse,
InputTextContent,
ItemReference,
ListResponsesResult,
MessageItem,
OutputTextContent,
ReasoningConfig,
ReasoningItem,
RefusalContent,
ResponseError,
ResponseInputItem,
ResponseObject,
ResponseOutputItem,
ResponseUsage,
StreamingEvent,
TextConfig,
TextFormat,
parse_streaming_event,
)
from .version import __version__

_logger = logging.getLogger(__name__)
Expand All @@ -20,4 +48,36 @@
_logger.addHandler(_sc)
_logger.propagate = False

__all__ = ["Configuration", "FoundryLocalManager", "__version__"]
__all__ = [
"Configuration",
"ContentPart",
"DeleteResponseResult",
"FoundryLocalManager",
"FunctionCallItem",
"FunctionCallOutputItem",
"FunctionToolDefinition",
"InputFileContent",
"InputImageContent",
"InputItemsListResponse",
"InputTextContent",
"ItemReference",
"ListResponsesResult",
"MessageItem",
"OutputTextContent",
"ReasoningConfig",
"ReasoningItem",
"RefusalContent",
"ResponseError",
"ResponseInputItem",
"ResponseObject",
"ResponseOutputItem",
"ResponseUsage",
"ResponsesAPIError",
"ResponsesClient",
"ResponsesClientSettings",
"StreamingEvent",
"TextConfig",
"TextFormat",
"__version__",
"parse_streaming_event",
]
5 changes: 5 additions & 0 deletions sdk/python/src/detail/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from ..openai.chat_client import ChatClient
from ..openai.audio_client import AudioClient
from ..openai.embedding_client import EmbeddingClient
from ..openai.responses_client import ResponsesClient
from .model_variant import ModelVariant
from ..exception import FoundryLocalException
from .core_interop import CoreInterop
Expand Down Expand Up @@ -146,3 +147,7 @@ def get_audio_client(self) -> AudioClient:
def get_embedding_client(self) -> EmbeddingClient:
"""Get an embedding client for the currently selected variant."""
return self._selected_variant.get_embedding_client()

def create_responses_client(self, base_url: str) -> "ResponsesClient":
Comment thread
MaanavD marked this conversation as resolved.
Outdated
"""Create a Responses API client for the currently selected variant."""
return self._selected_variant.create_responses_client(base_url)
9 changes: 9 additions & 0 deletions sdk/python/src/detail/model_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ..openai.audio_client import AudioClient
from ..openai.chat_client import ChatClient
from ..openai.embedding_client import EmbeddingClient
from ..openai.responses_client import ResponsesClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -175,3 +176,11 @@ def get_audio_client(self) -> AudioClient:
def get_embedding_client(self) -> EmbeddingClient:
"""Create an OpenAI-compatible ``EmbeddingClient`` for this variant."""
return EmbeddingClient(self.id, self._core_interop)

def create_responses_client(self, base_url: str) -> ResponsesClient:
Comment thread
MaanavD marked this conversation as resolved.
Outdated
"""Create a Responses API client for this variant.

:param base_url: Base URL of the running Foundry Local web service
(e.g. ``manager.urls[0]``).
"""
return ResponsesClient(base_url, self.id)
23 changes: 23 additions & 0 deletions sdk/python/src/foundry_local_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .detail.core_interop import CoreInterop, InteropRequest
from .detail.model_load_manager import ModelLoadManager
from .exception import FoundryLocalException
from .openai.responses_client import ResponsesClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -194,3 +195,25 @@ def stop_web_service(self):
raise FoundryLocalException(f"Error stopping web service: {response.error}")

self.urls = None

def create_responses_client(self, model_id: Optional[str] = None) -> ResponsesClient:
"""Create a :class:`ResponsesClient` bound to the running web service.

The Responses API is HTTP-only, so the web service must be started
before calling this. Use :meth:`start_web_service` first.

Args:
model_id: Optional default model ID baked into the client. May also
be supplied per-call via ``options['model']``.

Returns:
A new :class:`ResponsesClient`.

Raises:
FoundryLocalException: If the web service has not been started.
"""
if not self.urls:
raise FoundryLocalException(
"Web service is not running. Call start_web_service() first."
)
return ResponsesClient(self.urls[0], model_id)
16 changes: 16 additions & 0 deletions sdk/python/src/imodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from .openai.chat_client import ChatClient
from .openai.audio_client import AudioClient
from .openai.embedding_client import EmbeddingClient
from .openai.responses_client import ResponsesClient
from .detail.model_data_types import ModelInfo

class IModel(ABC):
Expand Down Expand Up @@ -136,6 +137,21 @@ def get_embedding_client(self) -> 'EmbeddingClient':
"""
pass

@abstractmethod
def create_responses_client(self, base_url: str) -> 'ResponsesClient':
"""
Create an OpenAI Responses API client bound to the running web service.

Unlike the other clients, the Responses API is HTTP-only and requires
the Foundry Local web service to be started. Pass the base URL
returned by :attr:`FoundryLocalManager.urls` (e.g. ``manager.urls[0]``),
or use :meth:`FoundryLocalManager.create_responses_client` directly.

:param base_url: Base URL of the running Foundry Local web service.
:return: ResponsesClient instance for this variant's model id.
"""
pass

@property
@abstractmethod
def variants(self) -> List['IModel']:
Expand Down
8 changes: 6 additions & 2 deletions sdk/python/src/openai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
"""OpenAI-compatible clients for chat completions and audio transcription."""
"""OpenAI-compatible clients for chat completions, audio, embeddings, and Responses API."""

from .chat_client import ChatClient, ChatClientSettings
from .audio_client import AudioClient
Expand All @@ -14,6 +14,7 @@
LiveAudioTranscriptionResponse,
TranscriptionContentPart,
)
from .responses_client import ResponsesClient, ResponsesClientSettings, ResponsesAPIError

__all__ = [
"AudioClient",
Expand All @@ -24,5 +25,8 @@
"LiveAudioTranscriptionOptions",
"LiveAudioTranscriptionResponse",
"LiveAudioTranscriptionSession",
"ResponsesAPIError",
Comment thread
MaanavD marked this conversation as resolved.
Outdated
"ResponsesClient",
"ResponsesClientSettings",
"TranscriptionContentPart",
]
]
Loading
Loading