diff --git a/samples/README.md b/samples/README.md index bcac6bf3..5439d653 100644 --- a/samples/README.md +++ b/samples/README.md @@ -11,4 +11,4 @@ Explore complete working examples that demonstrate how to use Foundry Local — | [**C#**](cs/) | 13 | .NET SDK samples including native chat, embeddings, audio transcription, tool calling, model management, web server, and tutorials. Uses WinML on Windows for hardware acceleration. | | [**JavaScript**](js/) | 13 | Node.js SDK samples including native chat, embeddings, audio transcription, Electron desktop app, Copilot SDK integration, LangChain, tool calling, web server, and tutorials. | | [**Python**](python/) | 10 | Python samples using the OpenAI-compatible API, including chat, embeddings, audio transcription, LangChain integration, tool calling, web server, and tutorials. | -| [**Rust**](rust/) | 9 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, and tutorials. | +| [**Rust**](rust/) | 10 | Rust SDK samples including native chat, embeddings, audio transcription, tool calling, web server, Responses API, and tutorials. | diff --git a/samples/rust/Cargo.toml b/samples/rust/Cargo.toml index 7be551ea..ea0b7a31 100644 --- a/samples/rust/Cargo.toml +++ b/samples/rust/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "foundry-local-webserver", + "web-server-responses", "tool-calling-foundry-local", "native-chat-completions", "audio-transcription-example", diff --git a/samples/rust/README.md b/samples/rust/README.md index bc65306f..5980dcbc 100644 --- a/samples/rust/README.md +++ b/samples/rust/README.md @@ -14,6 +14,7 @@ These samples demonstrate how to use the Rust binding for Foundry Local. | [embeddings](embeddings/) | Generate single and batch text embeddings using the native embedding client. | | [audio-transcription-example](audio-transcription-example/) | Audio transcription (non-streaming and streaming) using the Whisper model. | | [foundry-local-webserver](foundry-local-webserver/) | Start a local OpenAI-compatible web server and call it with a standard HTTP client. | +| [web-server-responses](web-server-responses/) | Call a running local OpenAI-compatible web server with the Responses API, including streaming and tool calling. | | [tool-calling-foundry-local](tool-calling-foundry-local/) | Tool calling with streaming responses, multi-turn conversation, and local tool execution. | | [tutorial-chat-assistant](tutorial-chat-assistant/) | Build an interactive multi-turn chat assistant (tutorial). | | [tutorial-document-summarizer](tutorial-document-summarizer/) | Summarize documents with AI (tutorial). | diff --git a/samples/rust/web-server-responses/Cargo.toml b/samples/rust/web-server-responses/Cargo.toml new file mode 100644 index 00000000..8395637c --- /dev/null +++ b/samples/rust/web-server-responses/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "web-server-responses" +version = "0.1.0" +edition = "2021" +description = "Responses API sample using the Foundry Local OpenAI-compatible web service" + +[dependencies] +foundry-local-sdk = { path = "../../../sdk/rust" } +tokio = { version = "1", features = ["rt-multi-thread", "macros"] } +serde_json = "1" +reqwest = { version = "0.12", features = ["json", "stream"] } + +[target.'cfg(windows)'.dependencies] +foundry-local-sdk = { path = "../../../sdk/rust", features = ["winml"] } diff --git a/samples/rust/web-server-responses/README.md b/samples/rust/web-server-responses/README.md new file mode 100644 index 00000000..49d94dcd --- /dev/null +++ b/samples/rust/web-server-responses/README.md @@ -0,0 +1,67 @@ +# Responses API web-service sample + +This sample starts the Foundry Local OpenAI-compatible web service with the Rust SDK, then calls the Responses API through raw HTTP requests to `/v1/responses`. + +It demonstrates: + +- Non-streaming Responses API calls +- Streaming Server-Sent Events (SSE) responses +- Function/tool calling with `previous_response_id` +- Local model load/unload and web-service cleanup + +## Prerequisites + +- Rust 1.70 or later +- Foundry Local runtime prerequisites for your platform +- Internet access the first time dependencies, execution providers, or the sample model need to be downloaded + +No OpenAI API key is required. The sample talks to the local Foundry Local web service. + +## What gets installed + +Cargo restores the Rust crates declared in `Cargo.toml`: + +| Dependency | Purpose | +|------------|---------| +| `foundry-local-sdk` | Initializes Foundry Local, downloads/registers execution providers, manages the model, and starts/stops the local web service. | +| `tokio` | Runs the async sample. | +| `reqwest` | Sends JSON requests and reads streaming SSE chunks from `/v1/responses`. | +| `serde_json` | Builds request payloads and reads response JSON. | + +On Windows, the sample enables the SDK `winml` feature through the target-specific dependency in `Cargo.toml`. + +At runtime, the sample also: + +- Downloads and registers Foundry Local execution providers if needed. +- Downloads `qwen2.5-0.5b` if it is not already cached. +- Starts the local OpenAI-compatible web service and uses the dynamic URL returned by the SDK. + +Downloaded models, native runtime files, and Cargo build outputs are local machine artifacts and should not be committed. + +## Run the sample + +From the Rust samples workspace: + +```powershell +cd samples\rust +cargo run -p web-server-responses +``` + +Or from this sample directory: + +```powershell +cd samples\rust\web-server-responses +cargo run +``` + +The sample prints progress for execution-provider/model setup, then runs: + +1. A non-streaming Responses request. +2. A streaming Responses request that consumes `response.output_text.delta` events. +3. A function-calling request that asks the model to call `get_weather`, submits a `function_call_output`, and prints the final assistant response. + +## Troubleshooting + +If setup fails while resolving native Foundry Local symbols, verify that your locally installed Foundry Local runtime packages are compatible with the SDK version in this repository. + +If model download is unavailable, pre-cache `qwen2.5-0.5b` with your normal Foundry Local workflow, then run the sample again. diff --git a/samples/rust/web-server-responses/src/main.rs b/samples/rust/web-server-responses/src/main.rs new file mode 100644 index 00000000..774ed199 --- /dev/null +++ b/samples/rust/web-server-responses/src/main.rs @@ -0,0 +1,322 @@ +// +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//! Responses API web-service sample. +//! +//! Demonstrates how to use the Rust SDK for Foundry Local setup, model +//! lifecycle, and local web-service lifecycle, then call `/v1/responses` with a +//! standard HTTP client. + +// +use std::error::Error; +use std::io::{self, Write}; + +use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; +use serde_json::{json, Value}; +// + +type SampleResult = Result>; +const MODEL_ALIAS: &str = "qwen2.5-0.5b"; + +#[tokio::main] +async fn main() -> SampleResult<()> { + println!("Responses Web Service"); + println!("=====================\n"); + + // ── 1. Initialise the SDK ──────────────────────────────────────────── + // + println!("Initializing Foundry Local SDK..."); + let manager = FoundryLocalManager::create(FoundryLocalConfig::new("foundry_local_samples"))?; + println!("SDK initialized successfully"); + + manager + .download_and_register_eps_with_progress(None, { + let mut current_ep = String::new(); + move |ep_name: &str, percent: f64| { + if ep_name != current_ep { + if !current_ep.is_empty() { + println!(); + } + current_ep = ep_name.to_string(); + } + print!("\r {:<30} {:5.1}%", ep_name, percent); + io::stdout().flush().ok(); + } + }) + .await?; + println!(); + // + + // ── 2. Download and load a model ───────────────────────────────────── + // + let model = manager.catalog().get_model(MODEL_ALIAS).await?; + + if !model.is_cached().await? { + println!("Downloading model {MODEL_ALIAS}..."); + model + .download(Some(|progress: f64| { + print!("\rDownloading model... {progress:.1}%"); + io::stdout().flush().ok(); + })) + .await?; + println!(); + } + + println!("Loading model {MODEL_ALIAS}..."); + model.load().await?; + println!("Model loaded"); + // + + // ── 3. Start the OpenAI-compatible web service ─────────────────────── + // + println!("Starting web service..."); + manager.start_web_service().await?; + println!("Web service started"); + + let endpoint = manager + .urls()? + .first() + .expect("Web service did not return an endpoint") + .trim_end_matches('/') + .to_string(); + let base_url = format!("{endpoint}/v1"); + println!("Using base URL: {base_url}"); + // + + let result = run_responses_flow(&base_url, model.id()).await; + + // ── 4. Clean up ────────────────────────────────────────────────────── + manager.stop_web_service().await.ok(); + model.unload().await.ok(); + + result +} + +async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()> { + let http = reqwest::Client::new(); + + println!("\nTesting a non-streaming Responses call..."); + let response = post_response_json( + &http, + base_url, + json!({ + "model": model_id, + "input": "Reply with one short sentence about local AI.", + "temperature": 0.0, + "max_output_tokens": 64, + "store": false + }), + ) + .await?; + println!("[ASSISTANT]: {}", output_text(&response)); + + println!("\nTesting a streaming Responses call..."); + print!("[ASSISTANT STREAM]: "); + io::stdout().flush().ok(); + let streaming_response = http + .post(format!("{base_url}/responses")) + .header(reqwest::header::ACCEPT, "text/event-stream") + .json(&json!({ + "model": model_id, + "input": "Count from one to three.", + "temperature": 0.0, + "max_output_tokens": 64, + "store": false, + "stream": true + })) + .send() + .await?; + let streamed = read_responses_sse(streaming_response).await?; + println!(); + if !streamed.created || streamed.delta_count == 0 || !streamed.completed { + return Err( + "stream did not include response.created, text delta, and completion events".into(), + ); + } + + println!("\nTesting Responses tool calling..."); + let tools = [get_weather_tool()]; + let tool_response = post_response_json( + &http, + base_url, + json!({ + "model": model_id, + "input": "Use the get_weather tool and then answer with the weather.", + "tools": tools, + "tool_choice": "required", + "temperature": 0.0, + "max_output_tokens": 64, + "store": true + }), + ) + .await?; + + let (call_id, name) = + find_function_call(&tool_response).ok_or("expected a function_call item")?; + println!("[TOOL CALL]: {name} ({call_id})"); + + let final_response = post_response_json( + &http, + base_url, + json!({ + "model": model_id, + "previous_response_id": tool_response["id"].clone(), + "input": [{ + "type": "function_call_output", + "call_id": call_id, + "output": "{\"location\":\"Seattle\",\"weather\":\"72 degrees F and sunny\"}" + }], + "tools": [get_weather_tool()], + "temperature": 0.0, + "max_output_tokens": 64, + "store": false + }), + ) + .await?; + println!("[ASSISTANT FINAL]: {}", output_text(&final_response)); + + Ok(()) +} + +async fn post_response_json( + http: &reqwest::Client, + base_url: &str, + body: Value, +) -> SampleResult { + let response = http + .post(format!("{base_url}/responses")) + .json(&body) + .send() + .await?; + let status = response.status(); + let text = response.text().await?; + if !status.is_success() { + return Err(format!("Responses API returned {status}: {text}").into()); + } + Ok(serde_json::from_str(&text)?) +} + +fn output_text(response: &Value) -> String { + if let Some(text) = response.get("output_text").and_then(Value::as_str) { + return text.to_string(); + } + + response + .get("output") + .and_then(Value::as_array) + .into_iter() + .flatten() + .find_map(|item| { + if item.get("type").and_then(Value::as_str) != Some("message") { + return None; + } + match item.get("content") { + Some(Value::String(text)) => Some(text.clone()), + Some(Value::Array(parts)) => Some( + parts + .iter() + .filter_map(|part| { + (part.get("type").and_then(Value::as_str) == Some("output_text")) + .then(|| part.get("text").and_then(Value::as_str)) + .flatten() + }) + .collect::(), + ), + _ => None, + } + }) + .unwrap_or_default() +} + +fn find_function_call(response: &Value) -> Option<(String, String)> { + response.get("output")?.as_array()?.iter().find_map(|item| { + if item.get("type").and_then(Value::as_str) != Some("function_call") { + return None; + } + let call_id = item.get("call_id")?.as_str()?.to_string(); + let name = item.get("name")?.as_str()?.to_string(); + Some((call_id, name)) + }) +} + +fn get_weather_tool() -> Value { + json!({ + "type": "function", + "name": "get_weather", + "description": "Get the current weather. This sample always returns Seattle weather.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + }) +} + +#[derive(Default)] +struct StreamSummary { + created: bool, + delta_count: usize, + completed: bool, +} + +async fn read_responses_sse(mut response: reqwest::Response) -> SampleResult { + let status = response.status(); + if !status.is_success() { + let text = response.text().await?; + return Err(format!("Responses API returned {status}: {text}").into()); + } + + let mut buffer = String::new(); + let mut summary = StreamSummary::default(); + + while let Some(chunk) = response.chunk().await? { + buffer.push_str(&String::from_utf8_lossy(&chunk).replace("\r\n", "\n")); + while let Some(block_end) = buffer.find("\n\n") { + let block = buffer[..block_end].to_string(); + buffer = buffer[block_end + 2..].to_string(); + if handle_sse_block(&block, &mut summary) { + return Ok(summary); + } + } + } + + if !buffer.trim().is_empty() { + handle_sse_block(&buffer, &mut summary); + } + + Ok(summary) +} + +fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool { + let data = block + .lines() + .filter_map(|line| line.trim().strip_prefix("data:").map(str::trim_start)) + .collect::>() + .join("\n"); + + if data.is_empty() { + return false; + } + if data == "[DONE]" { + return true; + } + + if let Ok(event) = serde_json::from_str::(&data) { + match event.get("type").and_then(Value::as_str) { + Some("response.created") => summary.created = true, + Some("response.output_text.delta") => { + summary.delta_count += 1; + if let Some(delta) = event.get("delta").and_then(Value::as_str) { + print!("{delta}"); + io::stdout().flush().ok(); + } + } + Some("response.completed") => summary.completed = true, + _ => {} + } + } + + false +} +// diff --git a/sdk/rust/Cargo.toml b/sdk/rust/Cargo.toml index 94794697..d5517137 100644 --- a/sdk/rust/Cargo.toml +++ b/sdk/rust/Cargo.toml @@ -24,7 +24,7 @@ tokio = { version = "1", features = ["rt-multi-thread", "macros", "sync"] } tokio-stream = "0.1" tokio-util = "0.7" futures-core = "0.3" -reqwest = { version = "0.12", features = ["json"] } +reqwest = { version = "0.12", features = ["json", "stream"] } urlencoding = "2" async-openai = { version = "0.33", default-features = false, features = ["chat-completion-types", "embedding-types"] } @@ -34,6 +34,10 @@ zip = "2" serde_json = "1" serde = { version = "1", features = ["derive"] } +[[example]] +name = "responses_web_service" +path = "examples/responses_web_service.rs" + [[example]] name = "chat_completion" path = "examples/chat_completion.rs" diff --git a/sdk/rust/examples/responses_web_service.rs b/sdk/rust/examples/responses_web_service.rs new file mode 100644 index 00000000..06948f4a --- /dev/null +++ b/sdk/rust/examples/responses_web_service.rs @@ -0,0 +1,297 @@ +//! Responses API web-service sample. +//! +//! This sample uses the Rust SDK only for Foundry Local setup and lifecycle: +//! manager initialization, model lookup/download/load, and local web-service +//! start/stop. The actual `/v1/responses` calls use raw HTTP against the +//! OpenAI-compatible local endpoint. + +use std::error::Error; +use std::io::{self, Write}; + +use foundry_local_sdk::{FoundryLocalConfig, FoundryLocalManager}; +use serde_json::{json, Value}; + +type SampleResult = Result>; +const MODEL_ALIAS: &str = "qwen2.5-0.5b"; + +#[tokio::main] +async fn main() -> SampleResult<()> { + println!("Responses Web Service"); + println!("=====================\n"); + + let config = FoundryLocalConfig::new("foundry_local_responses_web_service_sample"); + let manager = FoundryLocalManager::create(config)?; + + manager + .download_and_register_eps_with_progress(None, { + let mut current_ep = String::new(); + move |ep_name: &str, percent: f64| { + if ep_name != current_ep { + if !current_ep.is_empty() { + println!(); + } + current_ep = ep_name.to_string(); + } + print!("\r {:<30} {:5.1}%", ep_name, percent); + io::stdout().flush().ok(); + } + }) + .await?; + println!(); + + let model = manager.catalog().get_model(MODEL_ALIAS).await?; + if !model.is_cached().await? { + println!("Downloading model '{}'...", model.alias()); + model + .download(Some(|progress: f64| println!(" {progress:.1}%"))) + .await?; + } + + println!("Loading model '{}'...", model.alias()); + model.load().await?; + + println!("Starting local OpenAI-compatible web service..."); + manager.start_web_service().await?; + let base_url = format!( + "{}/v1", + manager + .urls()? + .first() + .expect("web service did not return a URL") + .trim_end_matches('/') + ); + println!("Using base URL: {base_url}"); + + let result = run_responses_flow(&base_url, model.id()).await; + + manager.stop_web_service().await.ok(); + model.unload().await.ok(); + + result +} + +async fn run_responses_flow(base_url: &str, model_id: &str) -> SampleResult<()> { + let http = reqwest::Client::new(); + + println!("\n--- Non-streaming response ---"); + let response = post_response_json( + &http, + base_url, + json!({ + "model": model_id, + "input": "What is 2 + 2? Respond with just the answer.", + "temperature": 0.0, + "max_output_tokens": 64, + "store": false + }), + ) + .await?; + println!("Assistant: {}", output_text(&response)); + + println!("\n--- Streaming response ---"); + print!("Assistant: "); + io::stdout().flush().ok(); + let streaming_response = http + .post(format!("{base_url}/responses")) + .json(&json!({ + "model": model_id, + "input": "Count from 1 to 3.", + "temperature": 0.0, + "max_output_tokens": 64, + "store": false, + "stream": true + })) + .header(reqwest::header::ACCEPT, "text/event-stream") + .send() + .await?; + let streamed = read_responses_sse(streaming_response).await?; + println!("\nSaw {} text delta event(s).", streamed.delta_count); + if !streamed.created || streamed.delta_count == 0 || !streamed.completed { + return Err( + "stream did not include response.created, text delta, and completion events".into(), + ); + } + + println!("\n--- Function calling response ---"); + let weather_tool = get_weather_tool(); + let tool_response = post_response_json( + &http, + base_url, + json!({ + "model": model_id, + "input": "Use the get_weather tool and then answer with the weather.", + "tools": [weather_tool.clone()], + "tool_choice": "required", + "temperature": 0.0, + "max_output_tokens": 64, + "store": true + }), + ) + .await?; + let (call_id, name) = find_function_call(&tool_response) + .ok_or("expected a function_call item in the tool response")?; + println!("Model requested tool call: {name} ({call_id})"); + + let final_response = post_response_json( + &http, + base_url, + json!({ + "model": model_id, + "previous_response_id": tool_response["id"].clone(), + "input": [{ + "type": "function_call_output", + "call_id": call_id, + "output": "{\"location\":\"Seattle\",\"weather\":\"72 degrees F and sunny\"}" + }], + "tools": [weather_tool], + "temperature": 0.0, + "max_output_tokens": 64, + "store": false + }), + ) + .await?; + println!("Assistant: {}", output_text(&final_response)); + + Ok(()) +} + +async fn post_response_json( + http: &reqwest::Client, + base_url: &str, + body: Value, +) -> SampleResult { + let response = http + .post(format!("{base_url}/responses")) + .json(&body) + .send() + .await?; + let status = response.status(); + let text = response.text().await?; + if !status.is_success() { + return Err(format!("Responses API returned {status}: {text}").into()); + } + Ok(serde_json::from_str(&text)?) +} + +fn output_text(response: &Value) -> String { + if let Some(text) = response.get("output_text").and_then(Value::as_str) { + return text.to_string(); + } + + response + .get("output") + .and_then(Value::as_array) + .into_iter() + .flatten() + .find_map(|item| { + if item.get("type").and_then(Value::as_str) != Some("message") { + return None; + } + match item.get("content") { + Some(Value::String(text)) => Some(text.clone()), + Some(Value::Array(parts)) => Some( + parts + .iter() + .filter_map(|part| { + (part.get("type").and_then(Value::as_str) == Some("output_text")) + .then(|| part.get("text").and_then(Value::as_str)) + .flatten() + }) + .collect::(), + ), + _ => None, + } + }) + .unwrap_or_default() +} + +fn find_function_call(response: &Value) -> Option<(String, String)> { + response.get("output")?.as_array()?.iter().find_map(|item| { + if item.get("type").and_then(Value::as_str) != Some("function_call") { + return None; + } + let call_id = item.get("call_id")?.as_str()?.to_string(); + let name = item.get("name")?.as_str()?.to_string(); + Some((call_id, name)) + }) +} + +fn get_weather_tool() -> Value { + json!({ + "type": "function", + "name": "get_weather", + "description": "Get the current weather. This sample always returns Seattle weather.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + }) +} + +#[derive(Default)] +struct StreamSummary { + created: bool, + delta_count: usize, + completed: bool, +} + +async fn read_responses_sse(mut response: reqwest::Response) -> SampleResult { + let status = response.status(); + if !status.is_success() { + let text = response.text().await?; + return Err(format!("Responses API returned {status}: {text}").into()); + } + + let mut buffer = String::new(); + let mut summary = StreamSummary::default(); + + while let Some(chunk) = response.chunk().await? { + buffer.push_str(&String::from_utf8_lossy(&chunk).replace("\r\n", "\n")); + while let Some(block_end) = buffer.find("\n\n") { + let block = buffer[..block_end].to_string(); + buffer = buffer[block_end + 2..].to_string(); + if handle_sse_block(&block, &mut summary) { + return Ok(summary); + } + } + } + + if !buffer.trim().is_empty() { + handle_sse_block(&buffer, &mut summary); + } + + Ok(summary) +} + +fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool { + let data = block + .lines() + .filter_map(|line| line.trim().strip_prefix("data:").map(str::trim_start)) + .collect::>() + .join("\n"); + + if data.is_empty() { + return false; + } + if data == "[DONE]" { + return true; + } + + if let Ok(event) = serde_json::from_str::(&data) { + match event.get("type").and_then(Value::as_str) { + Some("response.created") => summary.created = true, + Some("response.output_text.delta") => { + summary.delta_count += 1; + if let Some(delta) = event.get("delta").and_then(Value::as_str) { + print!("{delta}"); + io::stdout().flush().ok(); + } + } + Some("response.completed") => summary.completed = true, + _ => {} + } + } + + false +} diff --git a/sdk/rust/tests/integration/main.rs b/sdk/rust/tests/integration/main.rs index 05576000..d1366adc 100644 --- a/sdk/rust/tests/integration/main.rs +++ b/sdk/rust/tests/integration/main.rs @@ -15,4 +15,5 @@ mod embedding_client_test; mod live_audio_test; mod manager_test; mod model_test; +mod responses_test; mod web_service_test; diff --git a/sdk/rust/tests/integration/responses_test.rs b/sdk/rust/tests/integration/responses_test.rs new file mode 100644 index 00000000..cc5fae9c --- /dev/null +++ b/sdk/rust/tests/integration/responses_test.rs @@ -0,0 +1,366 @@ +use super::common; +use serde_json::{json, Value}; +use std::sync::Arc; + +type TestResult = Result>; + +static RESPONSES_TEST_LOCK: tokio::sync::Mutex<()> = tokio::sync::Mutex::const_new(()); + +struct ResponsesServiceContext { + manager: &'static foundry_local_sdk::FoundryLocalManager, + model: Arc, + base_url: String, + http: reqwest::Client, +} + +impl ResponsesServiceContext { + async fn start() -> Option { + if common::is_running_in_ci() { + eprintln!("Skipping Responses web-service test in CI"); + return None; + } + + let manager = common::get_test_manager(); + let catalog = manager.catalog(); + + let cached_models = match catalog.get_cached_models().await { + Ok(models) => models, + Err(e) => { + eprintln!("Skipping Responses web-service test: cached model lookup failed: {e}"); + return None; + } + }; + + let Some(cached_variant) = cached_models + .into_iter() + .find(|model| model.alias() == common::TEST_MODEL_ALIAS) + else { + eprintln!( + "Skipping Responses web-service test: model '{}' is not cached", + common::TEST_MODEL_ALIAS + ); + return None; + }; + + let model = match catalog.get_model(common::TEST_MODEL_ALIAS).await { + Ok(model) => model, + Err(e) => { + eprintln!( + "Skipping Responses web-service test: model '{}' unavailable: {e}", + common::TEST_MODEL_ALIAS + ); + return None; + } + }; + model + .select_variant(cached_variant.as_ref()) + .expect("select cached model variant failed"); + + model.load().await.expect("model.load() failed"); + manager + .start_web_service() + .await + .expect("start_web_service failed"); + + let base_url = format!( + "{}/v1", + manager + .urls() + .expect("urls() should succeed") + .first() + .expect("no URL returned") + .trim_end_matches('/') + ); + + Some(Self { + manager, + model, + base_url, + http: reqwest::Client::new(), + }) + } + + async fn cleanup(&self) { + self.manager + .stop_web_service() + .await + .expect("stop_web_service failed"); + self.model.unload().await.expect("model.unload() failed"); + } +} + +#[tokio::test] +async fn should_create_non_streaming_response_via_rest_api() { + let _guard = RESPONSES_TEST_LOCK.lock().await; + let Some(ctx) = ResponsesServiceContext::start().await else { + return; + }; + + let result = post_response_json( + &ctx, + json!({ + "model": ctx.model.id(), + "input": "What is 2 + 2? Respond with just the answer.", + "temperature": 0.0, + "max_output_tokens": 64, + "store": false + }), + ) + .await; + + ctx.cleanup().await; + + let body = result.expect("Responses non-streaming request failed"); + assert_eq!(body.get("object").and_then(Value::as_str), Some("response")); + assert_eq!( + body.get("status").and_then(Value::as_str), + Some("completed") + ); + let text = output_text(&body); + println!("Responses non-streaming text: {text}"); + assert!(!text.trim().is_empty(), "response text should not be empty"); +} + +#[tokio::test] +async fn should_stream_response_via_rest_api() { + let _guard = RESPONSES_TEST_LOCK.lock().await; + let Some(ctx) = ResponsesServiceContext::start().await else { + return; + }; + + let result = async { + let response = ctx + .http + .post(format!("{}/responses", ctx.base_url)) + .json(&json!({ + "model": ctx.model.id(), + "input": "Count from 1 to 3.", + "temperature": 0.0, + "max_output_tokens": 64, + "store": false, + "stream": true + })) + .header(reqwest::header::ACCEPT, "text/event-stream") + .send() + .await?; + + read_responses_sse(response).await + } + .await; + + ctx.cleanup().await; + + let summary = result.expect("Responses streaming request failed"); + assert!( + summary.created, + "expected a response.created event in the stream" + ); + assert!( + summary.delta_count > 0, + "expected at least one response.output_text.delta event" + ); + assert!( + summary.completed, + "expected a response.completed event in the stream" + ); +} + +#[tokio::test] +async fn should_complete_tool_calling_response_via_rest_api() { + let _guard = RESPONSES_TEST_LOCK.lock().await; + let Some(ctx) = ResponsesServiceContext::start().await else { + return; + }; + + let result = async { + let weather_tool = get_weather_tool(); + let tool_response = post_response_json( + &ctx, + json!({ + "model": ctx.model.id(), + "input": "Use the get_weather tool for Seattle, then answer.", + "tools": [weather_tool.clone()], + "tool_choice": "required", + "temperature": 0.0, + "max_output_tokens": 64, + "store": true + }), + ) + .await?; + + let (call_id, name) = find_function_call(&tool_response) + .ok_or("expected a function_call item in the tool response")?; + if call_id.is_empty() { + return Err("expected non-empty function call ID".into()); + } + if name != "get_weather" { + return Err(format!("expected get_weather function call, got {name}").into()); + } + + let final_response = post_response_json( + &ctx, + json!({ + "model": ctx.model.id(), + "previous_response_id": tool_response["id"].clone(), + "input": [{ + "type": "function_call_output", + "call_id": call_id, + "output": "Seattle weather is 72F and sunny." + }], + "tools": [weather_tool], + "temperature": 0.0, + "max_output_tokens": 64, + "store": false + }), + ) + .await?; + + if final_response.get("status").and_then(Value::as_str) != Some("completed") { + return Err(format!("expected completed final response, got {final_response}").into()); + } + + Ok::>(output_text(&final_response)) + } + .await; + + ctx.cleanup().await; + + let text = result.expect("Responses tool-calling flow failed"); + println!("Responses tool final text: {text}"); + assert!( + !text.trim().is_empty(), + "final response text should not be empty" + ); +} + +async fn post_response_json(ctx: &ResponsesServiceContext, body: Value) -> TestResult { + let response = ctx + .http + .post(format!("{}/responses", ctx.base_url)) + .json(&body) + .send() + .await?; + let status = response.status(); + let text = response.text().await?; + if !status.is_success() { + return Err(format!("Responses API returned {status}: {text}").into()); + } + Ok(serde_json::from_str(&text)?) +} + +fn output_text(response: &Value) -> String { + if let Some(text) = response.get("output_text").and_then(Value::as_str) { + return text.to_string(); + } + + response + .get("output") + .and_then(Value::as_array) + .into_iter() + .flatten() + .find_map(|item| { + if item.get("type").and_then(Value::as_str) != Some("message") { + return None; + } + match item.get("content") { + Some(Value::String(text)) => Some(text.clone()), + Some(Value::Array(parts)) => Some( + parts + .iter() + .filter_map(|part| { + (part.get("type").and_then(Value::as_str) == Some("output_text")) + .then(|| part.get("text").and_then(Value::as_str)) + .flatten() + }) + .collect::(), + ), + _ => None, + } + }) + .unwrap_or_default() +} + +fn find_function_call(response: &Value) -> Option<(String, String)> { + response.get("output")?.as_array()?.iter().find_map(|item| { + if item.get("type").and_then(Value::as_str) != Some("function_call") { + return None; + } + let call_id = item.get("call_id")?.as_str()?.to_string(); + let name = item.get("name")?.as_str()?.to_string(); + Some((call_id, name)) + }) +} + +fn get_weather_tool() -> Value { + json!({ + "type": "function", + "name": "get_weather", + "description": "Get the current weather. This test always returns Seattle weather.", + "parameters": { + "type": "object", + "properties": {}, + "additionalProperties": false + } + }) +} + +#[derive(Default)] +struct StreamSummary { + created: bool, + delta_count: usize, + completed: bool, +} + +async fn read_responses_sse(mut response: reqwest::Response) -> TestResult { + let status = response.status(); + if !status.is_success() { + let text = response.text().await?; + return Err(format!("Responses API returned {status}: {text}").into()); + } + + let mut buffer = String::new(); + let mut summary = StreamSummary::default(); + + while let Some(chunk) = response.chunk().await? { + buffer.push_str(&String::from_utf8_lossy(&chunk).replace("\r\n", "\n")); + while let Some(block_end) = buffer.find("\n\n") { + let block = buffer[..block_end].to_string(); + buffer = buffer[block_end + 2..].to_string(); + if handle_sse_block(&block, &mut summary) { + return Ok(summary); + } + } + } + + if !buffer.trim().is_empty() { + handle_sse_block(&buffer, &mut summary); + } + + Ok(summary) +} + +fn handle_sse_block(block: &str, summary: &mut StreamSummary) -> bool { + let data = block + .lines() + .filter_map(|line| line.trim().strip_prefix("data:").map(str::trim_start)) + .collect::>() + .join("\n"); + + if data.is_empty() { + return false; + } + if data == "[DONE]" { + return true; + } + + if let Ok(event) = serde_json::from_str::(&data) { + match event.get("type").and_then(Value::as_str) { + Some("response.created") => summary.created = true, + Some("response.output_text.delta") => summary.delta_count += 1, + Some("response.completed") => summary.completed = true, + _ => {} + } + } + + false +}