diff --git a/samples/README.md b/samples/README.md
index bcac6bf3..bed7e41c 100644
--- a/samples/README.md
+++ b/samples/README.md
@@ -10,5 +10,5 @@ Explore complete working examples that demonstrate how to use Foundry Local —
 |----------|---------|-------------|
 | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. |
 | [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. |
-| [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. |
+| [**Python**](python/) | 11 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, Responses API, and tutorials. |
 | [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. |
diff --git a/samples/python/README.md b/samples/python/README.md
index 7262f012..49e99c8a 100644
--- a/samples/python/README.md
+++ b/samples/python/README.md
@@ -14,6 +14,7 @@ These samples demonstrate how to use Foundry Local with Python.
 | [embeddings](embeddings/) | Generate single and batch text embeddings using the Foundry Local SDK. |
 | [audio-transcription](audio-transcription/) | Transcribe audio files using the Whisper model. |
 | [web-server](web-server/) | Start a local OpenAI-compatible web server and call it with the OpenAI Python SDK. |
+| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. |
 | [tool-calling](tool-calling/) | Tool calling with custom function definitions (get_weather, calculate). |
 | [langchain-integration](langchain-integration/) | LangChain integration for building translation and text generation chains. |
 | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). |
diff --git a/samples/python/web-server-responses/README.md b/samples/python/web-server-responses/README.md
new file mode 100644
index 00000000..95666d91
--- /dev/null
+++ b/samples/python/web-server-responses/README.md
@@ -0,0 +1,44 @@
+# Foundry Local Python Responses Web-Service Sample
+
+This sample starts the Foundry Local OpenAI-compatible web service, then calls the Responses API with the official OpenAI Python client.
+
+It demonstrates:
+
+- A non-streaming `/v1/responses` call
+- A streaming `/v1/responses` call
+- A function/tool-calling round trip using `previous_response_id`
+
+## What gets installed
+
+Install the sample dependencies from `requirements.txt`:
+
+```bash
+pip install -r requirements.txt
+```
+
+That installs:
+
+- `foundry-local-sdk` on non-Windows platforms
+- `foundry-local-sdk-winml` on Windows
+- `openai`
+
+The sample downloads/registers Foundry Local execution providers and downloads the `qwen2.5-0.5b` model the first time it runs.
+
+## Run the sample
+
+From this directory:
+
+```bash
+python -m venv .venv
+.\.venv\Scripts\activate
+pip install -r requirements.txt
+python src\app.py
+```
+
+On macOS or Linux, activate the virtual environment with:
+
+```bash
+source .venv/bin/activate
+```
+
+The sample starts the local web service, sends Responses API requests to `http://localhost:<port>/v1`, prints the model output, and then unloads the model and stops the web service.
diff --git a/samples/python/web-server-responses/requirements.txt b/samples/python/web-server-responses/requirements.txt
new file mode 100644
index 00000000..db870f60
--- /dev/null
+++ b/samples/python/web-server-responses/requirements.txt
@@ -0,0 +1,3 @@
+foundry-local-sdk; sys_platform != "win32"
+foundry-local-sdk-winml; sys_platform == "win32"
+openai
diff --git a/samples/python/web-server-responses/src/app.py b/samples/python/web-server-responses/src/app.py
new file mode 100644
index 00000000..6f186a2a
--- /dev/null
+++ b/samples/python/web-server-responses/src/app.py
@@ -0,0 +1,152 @@
+# <complete_code>
+# <imports>
+import json
+from typing import Any
+
+from openai import OpenAI
+
+from foundry_local_sdk import Configuration, FoundryLocalManager
+# </imports>
+
+
+def get_response_text(response: Any) -> str:
+    if isinstance(getattr(response, "output_text", None), str):
+        return response.output_text
+    return "".join(
+        getattr(part, "text", "")
+        for item in getattr(response, "output", []) or []
+        for part in getattr(item, "content", []) or []
+        if getattr(part, "type", None) == "output_text"
+    )
+
+
+# <init>
+# Initialize the Foundry Local SDK
+config = Configuration(app_name="foundry_local_samples")
+FoundryLocalManager.initialize(config)
+manager = FoundryLocalManager.instance
+
+# Download and register all execution providers.
+current_ep = ""
+
+
+def _ep_progress(ep_name: str, percent: float):
+    global current_ep
+    if ep_name != current_ep:
+        if current_ep:
+            print()
+        current_ep = ep_name
+    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+
+
+manager.download_and_register_eps(progress_callback=_ep_progress)
+if current_ep:
+    print()
+# </init>
+
+# <model_setup>
+model_alias = "qwen2.5-0.5b"
+model = manager.catalog.get_model(model_alias)
+
+print(f"\nDownloading model {model_alias}...")
+model.download(
+    lambda progress: print(
+        f"\rDownloading model: {progress:.2f}%",
+        end="",
+        flush=True,
+    )
+)
+print("\nModel downloaded")
+
+print("\nLoading model...")
+model.load()
+print("Model loaded")
+# </model_setup>
+
+# <server_setup>
+print("\nStarting web service...")
+manager.start_web_service()
+base_url = manager.urls[0].rstrip("/") + "/v1"
+print("Web service started")
+
+# <<<<<< OPENAI SDK USAGE >>>>>>
+# Use the OpenAI SDK to call the local Foundry web service Responses API
+openai = OpenAI(
+    base_url=base_url,
+    api_key="notneeded",
+)
+# </server_setup>
+
+try:
+    print("\nTesting a non-streaming Responses call...")
+    response = openai.responses.create(
+        model=model.id,
+        input="Reply with one short sentence about local AI.",
+    )
+    print(f"[ASSISTANT]: {get_response_text(response)}")
+
+    print("\nTesting a streaming Responses call...")
+    stream = openai.responses.create(
+        model=model.id,
+        input="Count from one to three.",
+        stream=True,
+    )
+
+    print("[ASSISTANT STREAM]: ", end="", flush=True)
+    for event in stream:
+        if getattr(event, "type", None) == "response.output_text.delta":
+            print(getattr(event, "delta", ""), end="", flush=True)
+    print()
+
+    print("\nTesting Responses tool calling...")
+    tools = [
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "Get the current weather. This sample always returns Seattle weather.",
+            "parameters": {
+                "type": "object",
+                "properties": {},
+                "additionalProperties": False,
+            },
+        },
+    ]
+
+    tool_response = openai.responses.create(
+        model=model.id,
+        input="Use the get_weather tool and then answer with the weather.",
+        tools=tools,
+        tool_choice="required",
+        store=True,
+    )
+
+    function_call = next(
+        (item for item in getattr(tool_response, "output", []) or [] if getattr(item, "type", None) == "function_call"),
+        None,
+    )
+    if function_call is None:
+        raise RuntimeError("Expected the model to call get_weather.")
+
+    print(f"[TOOL CALL]: {function_call.name}({function_call.arguments})")
+
+    final_response = openai.responses.create(
+        model=model.id,
+        previous_response_id=tool_response.id,
+        input=[
+            {
+                "type": "function_call_output",
+                "call_id": function_call.call_id,
+                "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
+            }
+        ],
+        tools=tools,
+    )
+
+    print(f"[ASSISTANT FINAL]: {get_response_text(final_response)}")
+    # <<<<<< END OPENAI SDK USAGE >>>>>>
+finally:
+    # Tidy up
+    openai.close()
+    manager.stop_web_service()
+    model.unload()
+# </complete_code>
diff --git a/sdk/python/test/openai/test_responses_web_service.py b/sdk/python/test/openai/test_responses_web_service.py
new file mode 100644
index 00000000..e323a892
--- /dev/null
+++ b/sdk/python/test/openai/test_responses_web_service.py
@@ -0,0 +1,244 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+"""Integration tests for /v1/responses through the local web service.
+
+These tests intentionally use FoundryLocalManager only for SDK setup, model
+lifecycle, and web-service lifecycle. Actual Responses API calls go through the
+OpenAI-compatible HTTP endpoint directly.
+"""
+
+from __future__ import annotations
+
+import json
+
+import pytest
+import requests
+
+from ..conftest import TEST_MODEL_ALIAS, skip_in_ci
+
+
+pytestmark = skip_in_ci
+
+
+def _response_text(response: dict) -> str:
+    text = response.get("output_text")
+    if isinstance(text, str) and text:
+        return text
+
+    return "".join(
+        part.get("text", "")
+        for item in response.get("output", []) or []
+        if item.get("type") == "message"
+        for part in item.get("content", []) or []
+        if part.get("type") == "output_text" and isinstance(part.get("text"), str)
+    )
+
+
+def _post_response(base_url: str, body: dict) -> dict:
+    response = requests.post(
+        f"{base_url}/v1/responses",
+        headers={"Content-Type": "application/json"},
+        json=body,
+        timeout=60,
+    )
+    assert response.ok, response.text
+    return response.json()
+
+
+def _post_streaming_response(base_url: str, body: dict) -> list[dict]:
+    response = requests.post(
+        f"{base_url}/v1/responses",
+        headers={"Content-Type": "application/json", "Accept": "text/event-stream"},
+        json={**body, "stream": True},
+        stream=True,
+        timeout=(60, None),
+    )
+    assert response.ok, response.text
+
+    events: list[dict] = []
+    buffer = ""
+    try:
+        for chunk in response.iter_content(chunk_size=None, decode_unicode=False):
+            if not chunk:
+                continue
+            text = chunk.decode("utf-8", errors="replace") if isinstance(chunk, bytes) else chunk
+            buffer += text.replace("\r\n", "\n")
+
+            while "\n\n" in buffer:
+                block, buffer = buffer.split("\n\n", 1)
+                data = _sse_data(block)
+                if not data:
+                    continue
+                if data == "[DONE]":
+                    return events
+                events.append(json.loads(data))
+    finally:
+        response.close()
+
+    tail = buffer.strip()
+    if tail:
+        data = _sse_data(tail)
+        if data and data != "[DONE]":
+            events.append(json.loads(data))
+    return events
+
+
+def _sse_data(block: str) -> str:
+    lines: list[str] = []
+    for line in block.strip().split("\n"):
+        if line.startswith("data: "):
+            lines.append(line[6:])
+        elif line == "data:":
+            lines.append("")
+    return "\n".join(lines).strip()
+
+
+def _get_function_call(response: dict) -> dict | None:
+    for item in response.get("output", []) or []:
+        if item.get("type") == "function_call":
+            return item
+    return None
+
+
+def _get_weather_tool() -> dict:
+    return {
+        "type": "function",
+        "name": "get_weather",
+        "description": "Get the current weather. This test always returns Seattle weather.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "additionalProperties": False,
+        },
+    }
+
+
+@pytest.fixture(scope="module")
+def responses_web_service(manager, catalog):
+    cached = catalog.get_cached_models()
+    cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
+    if cached_variant is None:
+        pytest.skip(f"{TEST_MODEL_ALIAS} must be cached to run Responses web-service tests")
+
+    model = catalog.get_model(TEST_MODEL_ALIAS)
+    if model is None:
+        pytest.skip(f"{TEST_MODEL_ALIAS} was not found in the catalog")
+
+    model.select_variant(cached_variant)
+    service_started = False
+    model_loaded = False
+
+    try:
+        try:
+            model.load()
+            model_loaded = True
+            manager.start_web_service()
+            service_started = True
+        except Exception as exc:
+            message = str(exc)
+            if "execute_command_with_binary" in message:
+                pytest.skip(
+                    "Local Foundry Local Core/native runtime is stale: "
+                    "failed to resolve execute_command_with_binary"
+                )
+            pytest.skip(f"Failed to start Responses web-service test prerequisites: {exc}")
+
+        if not manager.urls:
+            pytest.skip("Web service started but did not return any URLs")
+
+        yield manager.urls[0].rstrip("/"), model.id
+    finally:
+        if service_started:
+            try:
+                manager.stop_web_service()
+            except Exception:
+                pass
+        if model_loaded:
+            try:
+                model.unload()
+            except Exception:
+                pass
+
+
+class TestResponsesWebService:
+    def test_should_create_non_streaming_response(self, responses_web_service):
+        base_url, model_id = responses_web_service
+
+        response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "What is 2 + 2? Answer with just the number.",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
+        )
+
+        assert response["object"] == "response"
+        assert response["status"] == "completed"
+        assert _response_text(response).strip()
+
+    def test_should_stream_response_events(self, responses_web_service):
+        base_url, model_id = responses_web_service
+
+        events = _post_streaming_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "Count from 1 to 3.",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
+        )
+
+        assert any(event.get("type") == "response.created" for event in events)
+        assert any(event.get("type") == "response.output_text.delta" for event in events)
+        assert any(event.get("type") == "response.completed" for event in events)
+
+    def test_should_round_trip_function_call_output(self, responses_web_service):
+        base_url, model_id = responses_web_service
+        weather_tool = _get_weather_tool()
+
+        tool_response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "input": "Use the get_weather tool and then answer with the weather.",
+                "tools": [weather_tool],
+                "tool_choice": "required",
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": True,
+            },
+        )
+        function_call = _get_function_call(tool_response)
+
+        assert function_call is not None, json.dumps(tool_response.get("output", []))
+        assert function_call["name"] == "get_weather"
+        assert isinstance(function_call["call_id"], str)
+
+        final_response = _post_response(
+            base_url,
+            {
+                "model": model_id,
+                "previous_response_id": tool_response["id"],
+                "input": [
+                    {
+                        "type": "function_call_output",
+                        "call_id": function_call["call_id"],
+                        "output": json.dumps({"location": "Seattle", "weather": "72 degrees F and sunny"}),
+                    }
+                ],
+                "tools": [weather_tool],
+                "temperature": 0,
+                "max_output_tokens": 64,
+                "store": False,
+            },
+        )
+
+        assert final_response["status"] == "completed"
+        assert _response_text(final_response).strip()