diff --git a/.pipelines/templates/test-python-steps.yml b/.pipelines/templates/test-python-steps.yml index 37bfa5a8..7bde34b3 100644 --- a/.pipelines/templates/test-python-steps.yml +++ b/.pipelines/templates/test-python-steps.yml @@ -128,7 +128,7 @@ steps: if ($LASTEXITCODE -ne 0) { throw "Windows App SDK Runtime install failed" } errorActionPreference: 'stop' -- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 +- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 pytest-asyncio>=1.3.0 displayName: 'Install test dependencies' - script: python -m pytest test/ -v diff --git a/samples/python/audio-transcription/src/app.py b/samples/python/audio-transcription/src/app.py index ca06fb28..d7e00c68 100644 --- a/samples/python/audio-transcription/src/app.py +++ b/samples/python/audio-transcription/src/app.py @@ -1,53 +1,59 @@ # # +import asyncio import sys from foundry_local_sdk import Configuration, FoundryLocalManager # -# -# Initialize the Foundry Local SDK -config = Configuration(app_name="foundry_local_samples") -FoundryLocalManager.initialize(config) -manager = FoundryLocalManager.instance - -# Download and register all execution providers. -current_ep = "" -def _ep_progress(ep_name: str, percent: float): - global current_ep - if ep_name != current_ep: - if current_ep: - print() - current_ep = ep_name - print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - -manager.download_and_register_eps(progress_callback=_ep_progress) -if current_ep: - print() +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + await FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance + + # Download and register all execution providers. + current_ep = "" + def _ep_progress(ep_name: str, percent: float): + nonlocal current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) -# Load the whisper model for speech-to-text -model = manager.catalog.get_model("whisper-tiny") -model.download( - lambda progress: print( - f"\rDownloading model: {progress:.2f}%", - end="", - flush=True, + await manager.download_and_register_eps(progress_callback=_ep_progress) + if current_ep: + print() + + # Load the whisper model for speech-to-text + model = await manager.catalog.get_model("whisper-tiny") + await model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) ) -) -print() -model.load() -print("Model loaded.") -# - -# -# Get the audio client and transcribe -audio_client = model.get_audio_client() -audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3" -result = audio_client.transcribe(audio_file) -print("Transcription:") -print(result.text) -# - -# Clean up -model.unload() + print() + await model.load() + print("Model loaded.") + # + + # + # Get the audio client and transcribe + audio_client = model.get_audio_client() + audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3" + result = await audio_client.transcribe(audio_file) + print("Transcription:") + print(result.text) + # + + # Clean up + await model.unload() + + +if __name__ == "__main__": + asyncio.run(main()) # diff --git a/samples/python/langchain-integration/src/app.py b/samples/python/langchain-integration/src/app.py index 4f8661cd..1cfc8790 100644 --- a/samples/python/langchain-integration/src/app.py +++ b/samples/python/langchain-integration/src/app.py @@ -1,73 +1,80 @@ # # +import asyncio from foundry_local_sdk import Configuration, FoundryLocalManager from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser # -# -# Initialize the Foundry Local SDK -config = Configuration(app_name="foundry_local_samples") -FoundryLocalManager.initialize(config) -manager = FoundryLocalManager.instance -# Download and register all execution providers. -current_ep = "" -def _ep_progress(ep_name: str, percent: float): - global current_ep - if ep_name != current_ep: - if current_ep: - print() - current_ep = ep_name - print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + await FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance -manager.download_and_register_eps(progress_callback=_ep_progress) -if current_ep: + # Download and register all execution providers. + current_ep = "" + def _ep_progress(ep_name: str, percent: float): + nonlocal current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) + + await manager.download_and_register_eps(progress_callback=_ep_progress) + if current_ep: + print() + + # Load a model + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) + ) print() + await model.load() + print("Model loaded.") + + # Start the web service to expose an OpenAI-compatible endpoint + await manager.start_web_service() + base_url = f"{manager.urls[0]}/v1" + # -# Load a model -model = manager.catalog.get_model("qwen2.5-0.5b") -model.download( - lambda progress: print( - f"\rDownloading model: {progress:.2f}%", - end="", - flush=True, + # + # Create a LangChain ChatOpenAI instance pointing to the local endpoint + llm = ChatOpenAI( + base_url=base_url, + api_key="none", + model=model.id, ) -) -print() -model.load() -print("Model loaded.") + # -# Start the web service to expose an OpenAI-compatible endpoint -manager.start_web_service() -base_url = f"{manager.urls[0]}/v1" -# + # + # Create a translation chain + prompt = ChatPromptTemplate.from_messages([ + ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."), + ("user", "{text}") + ]) -# -# Create a LangChain ChatOpenAI instance pointing to the local endpoint -llm = ChatOpenAI( - base_url=base_url, - api_key="none", - model=model.id, -) -# + chain = prompt | llm | StrOutputParser() -# -# Create a translation chain -prompt = ChatPromptTemplate.from_messages([ - ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."), - ("user", "{text}") -]) + # Run the chain + result = await chain.ainvoke({"language": "Spanish", "text": "Hello, how are you today?"}) + print(f"Translation: {result}") + # -chain = prompt | llm | StrOutputParser() + # Clean up + await model.unload() + await manager.stop_web_service() -# Run the chain -result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"}) -print(f"Translation: {result}") -# -# Clean up -model.unload() -manager.stop_web_service() +if __name__ == "__main__": + asyncio.run(main()) # diff --git a/samples/python/native-chat-completions/src/app.py b/samples/python/native-chat-completions/src/app.py index eba9df41..8fdb01f5 100644 --- a/samples/python/native-chat-completions/src/app.py +++ b/samples/python/native-chat-completions/src/app.py @@ -1,14 +1,15 @@ # # +import asyncio from foundry_local_sdk import Configuration, FoundryLocalManager # -def main(): +async def main(): # # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # Download and register all execution providers. @@ -21,13 +22,13 @@ def ep_progress(ep_name: str, percent: float): current_ep = ep_name print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - manager.download_and_register_eps(progress_callback=ep_progress) + await manager.download_and_register_eps(progress_callback=ep_progress) if current_ep: print() # Select and load a model from the catalog - model = manager.catalog.get_model("qwen2.5-0.5b") - model.download( + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download( lambda progress: print( f"\rDownloading model: {progress:.2f}%", end="", @@ -35,7 +36,7 @@ def ep_progress(ep_name: str, percent: float): ) ) print() - model.load() + await model.load() print("Model loaded and ready.") # Get a chat client @@ -50,7 +51,7 @@ def ep_progress(ep_name: str, percent: float): # Stream the response token by token print("Assistant: ", end="", flush=True) - for chunk in client.complete_streaming_chat(messages): + async for chunk in client.complete_streaming_chat(messages): content = chunk.choices[0].delta.content if content: print(content, end="", flush=True) @@ -58,10 +59,10 @@ def ep_progress(ep_name: str, percent: float): # # Clean up - model.unload() + await model.unload() print("Model unloaded.") if __name__ == "__main__": - main() + asyncio.run(main()) # diff --git a/samples/python/tool-calling/src/app.py b/samples/python/tool-calling/src/app.py index db619550..22e89518 100644 --- a/samples/python/tool-calling/src/app.py +++ b/samples/python/tool-calling/src/app.py @@ -1,5 +1,6 @@ # # +import asyncio import json from foundry_local_sdk import Configuration, FoundryLocalManager # @@ -83,7 +84,7 @@ def calculate(expression): # -def process_tool_calls(messages, response, client): +async def process_tool_calls(messages, response, client): """Handle tool calls in a loop until the model produces a final answer.""" choice = response.choices[0].message @@ -121,7 +122,7 @@ def process_tool_calls(messages, response, client): }) # Send the updated conversation back - response = client.complete_chat(messages, tools=tools) + response = await client.complete_chat(messages, tools=tools) choice = response.choices[0].message return choice.content @@ -129,10 +130,10 @@ def process_tool_calls(messages, response, client): # -def main(): +async def main(): # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # Download and register all execution providers. @@ -145,13 +146,13 @@ def ep_progress(ep_name: str, percent: float): current_ep = ep_name print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - manager.download_and_register_eps(progress_callback=ep_progress) + await manager.download_and_register_eps(progress_callback=ep_progress) if current_ep: print() # Select and load a model - model = manager.catalog.get_model("qwen2.5-0.5b") - model.download( + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download( lambda progress: print( f"\rDownloading model: {progress:.2f}%", end="", @@ -159,7 +160,7 @@ def ep_progress(ep_name: str, percent: float): ) ) print() - model.load() + await model.load() print("Model loaded and ready.") # Get a chat client @@ -179,17 +180,17 @@ def ep_progress(ep_name: str, percent: float): ] print("Sending request with tools...") - response = client.complete_chat(messages, tools=tools) - answer = process_tool_calls(messages, response, client) + response = await client.complete_chat(messages, tools=tools) + answer = await process_tool_calls(messages, response, client) print(f"\nAssistant: {answer}") # Clean up - model.unload() + await model.unload() print("Model unloaded.") # if __name__ == "__main__": - main() + asyncio.run(main()) # diff --git a/samples/python/tutorial-chat-assistant/src/app.py b/samples/python/tutorial-chat-assistant/src/app.py index 13f1c500..2650c0a2 100644 --- a/samples/python/tutorial-chat-assistant/src/app.py +++ b/samples/python/tutorial-chat-assistant/src/app.py @@ -1,14 +1,15 @@ # # +import asyncio from foundry_local_sdk import Configuration, FoundryLocalManager # -def main(): +async def main(): # # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # Download and register all execution providers. @@ -21,15 +22,15 @@ def ep_progress(ep_name: str, percent: float): current_ep = ep_name print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - manager.download_and_register_eps(progress_callback=ep_progress) + await manager.download_and_register_eps(progress_callback=ep_progress) if current_ep: print() # Select and load a model from the catalog - model = manager.catalog.get_model("qwen2.5-0.5b") - model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True)) + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True)) print() - model.load() + await model.load() print("Model loaded and ready.") # Get a chat client @@ -62,7 +63,7 @@ def ep_progress(ep_name: str, percent: float): # Stream the response token by token print("Assistant: ", end="", flush=True) full_response = "" - for chunk in client.complete_streaming_chat(messages): + async for chunk in client.complete_streaming_chat(messages): content = chunk.choices[0].delta.content if content: print(content, end="", flush=True) @@ -75,10 +76,10 @@ def ep_progress(ep_name: str, percent: float): # # Clean up - unload the model - model.unload() + await model.unload() print("Model unloaded. Goodbye!") if __name__ == "__main__": - main() + asyncio.run(main()) # diff --git a/samples/python/tutorial-document-summarizer/src/app.py b/samples/python/tutorial-document-summarizer/src/app.py index 055bb992..fb50f6ad 100644 --- a/samples/python/tutorial-document-summarizer/src/app.py +++ b/samples/python/tutorial-document-summarizer/src/app.py @@ -1,23 +1,24 @@ # # +import asyncio import sys from pathlib import Path from foundry_local_sdk import Configuration, FoundryLocalManager # -def summarize_file(client, file_path, system_prompt): +async def summarize_file(client, file_path, system_prompt): """Summarize a single file and print the result.""" content = Path(file_path).read_text(encoding="utf-8") messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": content} ] - response = client.complete_chat(messages) + response = await client.complete_chat(messages) print(response.choices[0].message.content) -def summarize_directory(client, directory, system_prompt): +async def summarize_directory(client, directory, system_prompt): """Summarize all .txt files in a directory.""" txt_files = sorted(Path(directory).glob("*.txt")) @@ -27,15 +28,15 @@ def summarize_directory(client, directory, system_prompt): for txt_file in txt_files: print(f"--- {txt_file.name} ---") - summarize_file(client, txt_file, system_prompt) + await summarize_file(client, txt_file, system_prompt) print() -def main(): +async def main(): # # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # Download and register all execution providers. @@ -48,15 +49,15 @@ def ep_progress(ep_name: str, percent: float): current_ep = ep_name print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - manager.download_and_register_eps(progress_callback=ep_progress) + await manager.download_and_register_eps(progress_callback=ep_progress) if current_ep: print() # Select and load a model from the catalog - model = manager.catalog.get_model("qwen2.5-0.5b") - model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True)) + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True)) print() - model.load() + await model.load() print("Model loaded and ready.\n") # Get a chat client @@ -75,17 +76,17 @@ def ep_progress(ep_name: str, percent: float): # if target_path.is_dir(): - summarize_directory(client, target_path, system_prompt) + await summarize_directory(client, target_path, system_prompt) else: print(f"--- {target_path.name} ---") - summarize_file(client, target_path, system_prompt) + await summarize_file(client, target_path, system_prompt) # # Clean up - model.unload() + await model.unload() print("\nModel unloaded. Done!") if __name__ == "__main__": - main() + asyncio.run(main()) # diff --git a/samples/python/tutorial-tool-calling/src/app.py b/samples/python/tutorial-tool-calling/src/app.py index bb22bfe0..097432c2 100644 --- a/samples/python/tutorial-tool-calling/src/app.py +++ b/samples/python/tutorial-tool-calling/src/app.py @@ -1,5 +1,6 @@ # # +import asyncio import json from foundry_local_sdk import Configuration, FoundryLocalManager # @@ -83,7 +84,7 @@ def calculate(expression): # -def process_tool_calls(messages, response, client): +async def process_tool_calls(messages, response, client): """Handle tool calls in a loop until the model produces a final answer.""" choice = response.choices[0].message @@ -121,7 +122,7 @@ def process_tool_calls(messages, response, client): }) # Send the updated conversation back - response = client.complete_chat(messages, tools=tools) + response = await client.complete_chat(messages, tools=tools) choice = response.choices[0].message return choice.content @@ -129,10 +130,10 @@ def process_tool_calls(messages, response, client): # -def main(): +async def main(): # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # Download and register all execution providers. @@ -145,13 +146,13 @@ def ep_progress(ep_name: str, percent: float): current_ep = ep_name print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - manager.download_and_register_eps(progress_callback=ep_progress) + await manager.download_and_register_eps(progress_callback=ep_progress) if current_ep: print() # Select and load a model - model = manager.catalog.get_model("qwen2.5-0.5b") - model.download( + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download( lambda progress: print( f"\rDownloading model: {progress:.2f}%", end="", @@ -159,7 +160,7 @@ def ep_progress(ep_name: str, percent: float): ) ) print() - model.load() + await model.load() print("Model loaded and ready.") # Get a chat client @@ -183,18 +184,18 @@ def ep_progress(ep_name: str, percent: float): messages.append({"role": "user", "content": user_input}) - response = client.complete_chat(messages, tools=tools) - answer = process_tool_calls(messages, response, client) + response = await client.complete_chat(messages, tools=tools) + answer = await process_tool_calls(messages, response, client) messages.append({"role": "assistant", "content": answer}) print(f"Assistant: {answer}\n") # Clean up - model.unload() + await model.unload() print("Model unloaded. Goodbye!") # if __name__ == "__main__": - main() + asyncio.run(main()) # diff --git a/samples/python/tutorial-voice-to-text/src/app.py b/samples/python/tutorial-voice-to-text/src/app.py index 8ebbba1b..9585f6f8 100644 --- a/samples/python/tutorial-voice-to-text/src/app.py +++ b/samples/python/tutorial-voice-to-text/src/app.py @@ -1,14 +1,15 @@ # # +import asyncio from foundry_local_sdk import Configuration, FoundryLocalManager # -def main(): +async def main(): # # Initialize the Foundry Local SDK config = Configuration(app_name="foundry_local_samples") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # @@ -22,14 +23,14 @@ def ep_progress(ep_name: str, percent: float): current_ep = ep_name print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) - manager.download_and_register_eps(progress_callback=ep_progress) + await manager.download_and_register_eps(progress_callback=ep_progress) if current_ep: print() # # Load the speech-to-text model - speech_model = manager.catalog.get_model("whisper-tiny") - speech_model.download( + speech_model = await manager.catalog.get_model("whisper-tiny") + await speech_model.download( lambda progress: print( f"\rDownloading speech model: {progress:.2f}%", end="", @@ -37,22 +38,22 @@ def ep_progress(ep_name: str, percent: float): ) ) print() - speech_model.load() + await speech_model.load() print("Speech model loaded.") # Transcribe the audio file audio_client = speech_model.get_audio_client() - transcription = audio_client.transcribe("meeting-notes.wav") + transcription = await audio_client.transcribe("meeting-notes.wav") print(f"\nTranscription:\n{transcription.text}") # Unload the speech model to free memory - speech_model.unload() + await speech_model.unload() # # # Load the chat model for summarization - chat_model = manager.catalog.get_model("qwen2.5-0.5b") - chat_model.download( + chat_model = await manager.catalog.get_model("qwen2.5-0.5b") + await chat_model.download( lambda progress: print( f"\rDownloading chat model: {progress:.2f}%", end="", @@ -60,7 +61,7 @@ def ep_progress(ep_name: str, percent: float): ) ) print() - chat_model.load() + await chat_model.load() print("Chat model loaded.") # Summarize the transcription into organized notes @@ -76,16 +77,16 @@ def ep_progress(ep_name: str, percent: float): {"role": "user", "content": transcription.text}, ] - response = client.complete_chat(messages) + response = await client.complete_chat(messages) summary = response.choices[0].message.content print(f"\nSummary:\n{summary}") # Clean up - chat_model.unload() + await chat_model.unload() print("\nDone. Models unloaded.") # if __name__ == "__main__": - main() + asyncio.run(main()) # diff --git a/samples/python/web-server/src/app.py b/samples/python/web-server/src/app.py index 67117029..c4fe9c1e 100644 --- a/samples/python/web-server/src/app.py +++ b/samples/python/web-server/src/app.py @@ -1,73 +1,80 @@ # # +import asyncio import openai from foundry_local_sdk import Configuration, FoundryLocalManager # -# -# Initialize the Foundry Local SDK -config = Configuration(app_name="foundry_local_samples") -FoundryLocalManager.initialize(config) -manager = FoundryLocalManager.instance -# Download and register all execution providers. -current_ep = "" -def _ep_progress(ep_name: str, percent: float): - global current_ep - if ep_name != current_ep: - if current_ep: - print() - current_ep = ep_name - print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) +async def main(): + # + # Initialize the Foundry Local SDK + config = Configuration(app_name="foundry_local_samples") + await FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance -manager.download_and_register_eps(progress_callback=_ep_progress) -if current_ep: + # Download and register all execution providers. + current_ep = "" + def _ep_progress(ep_name: str, percent: float): + nonlocal current_ep + if ep_name != current_ep: + if current_ep: + print() + current_ep = ep_name + print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True) + + await manager.download_and_register_eps(progress_callback=_ep_progress) + if current_ep: + print() + + # Load a model + model = await manager.catalog.get_model("qwen2.5-0.5b") + await model.download( + lambda progress: print( + f"\rDownloading model: {progress:.2f}%", + end="", + flush=True, + ) + ) print() + await model.load() + print("Model loaded.") -# Load a model -model = manager.catalog.get_model("qwen2.5-0.5b") -model.download( - lambda progress: print( - f"\rDownloading model: {progress:.2f}%", - end="", - flush=True, + # Start the web service to expose an OpenAI-compatible REST endpoint + await manager.start_web_service() + base_url = f"{manager.urls[0]}/v1" + # + + # + # Use the OpenAI SDK to connect to the local REST endpoint + client = openai.OpenAI( + base_url=base_url, + api_key="none", ) -) -print() -model.load() -print("Model loaded.") + # -# Start the web service to expose an OpenAI-compatible REST endpoint -manager.start_web_service() -base_url = f"{manager.urls[0]}/v1" -# + # + # Make a chat completion request via the REST API + response = client.chat.completions.create( + model=model.id, + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the golden ratio?"} + ], + stream=True, + ) -# -# Use the OpenAI SDK to connect to the local REST endpoint -client = openai.OpenAI( - base_url=base_url, - api_key="none", -) -# + for chunk in response: + if chunk.choices[0].delta.content is not None: + print(chunk.choices[0].delta.content, end="", flush=True) + print() + # -# -# Make a chat completion request via the REST API -response = client.chat.completions.create( - model=model.id, - messages=[ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "What is the golden ratio?"} - ], - stream=True, -) + # Clean up + await model.unload() + await manager.stop_web_service() -for chunk in response: - if chunk.choices[0].delta.content is not None: - print(chunk.choices[0].delta.content, end="", flush=True) -print() -# -# Clean up -model.unload() -manager.stop_web_service() +if __name__ == "__main__": + asyncio.run(main()) # diff --git a/sdk/python/examples/chat_completion.py b/sdk/python/examples/chat_completion.py index c0c58048..fabd89d0 100644 --- a/sdk/python/examples/chat_completion.py +++ b/sdk/python/examples/chat_completion.py @@ -10,31 +10,32 @@ including model discovery, loading, and inference. """ +import asyncio from foundry_local_sdk import Configuration, FoundryLocalManager -def main(): +async def main(): # 1. Initialize the SDK config = Configuration(app_name="ChatCompletionExample") print("Initializing Foundry Local Manager") - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) manager = FoundryLocalManager.instance # Discover available EPs and register them explicitly when needed. - eps = manager.discover_eps() + eps = await manager.discover_eps() print("Available execution providers:") for ep in eps: print(f" - {ep.name} (registered: {ep.is_registered})") - ep_result = manager.download_and_register_eps() + ep_result = await manager.download_and_register_eps() print(f"EP registration success: {ep_result.success} ({ep_result.status})") # 2. Print available models in the catalog and cache - models = manager.catalog.list_models() + models = await manager.catalog.list_models() print("Available models in catalog:") for m in models: print(f" - {m.alias} ({m.id})") - cached_models = manager.catalog.get_cached_models() + cached_models = await manager.catalog.get_cached_models() print("\nCached models:") for m in cached_models: print(f" - {m.alias} ({m.id})") @@ -42,22 +43,22 @@ def main(): CACHED_MODEL_ALIAS = "qwen2.5-0.5b" # 3. Find a model from the cache (+ download if not cached) - model = manager.catalog.get_model(CACHED_MODEL_ALIAS) + model = await manager.catalog.get_model(CACHED_MODEL_ALIAS) if model is None: print(f"Model '{CACHED_MODEL_ALIAS}' not found in catalog.") print("Available models:") - for m in manager.catalog.list_models(): + for m in await manager.catalog.list_models(): print(f" - {m.alias} ({m.id})") return - if not model.is_cached: + if not await model.is_cached(): print(f"Downloading {model.alias}...") - model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r")) + await model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r")) print() # 4. Load the model print(f"Loading {model.alias}...", end="") - model.load() + await model.load() print("loaded!") try: @@ -65,14 +66,14 @@ def main(): client = model.get_chat_client() print("\n--- Non-streaming ---") - response = client.complete_chat( + response = await client.complete_chat( messages=[{"role": "user", "content": "What is the capital of France? Reply briefly."}] ) print(f"Response: {response.choices[0].message.content}") # 6. Streaming print("\n--- Streaming ---") - for chunk in client.complete_streaming_chat( + async for chunk in client.complete_streaming_chat( [{"role": "user", "content": "Tell me a short joke."}] ): if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: @@ -84,9 +85,9 @@ def main(): finally: # 7. Cleanup - model.unload() + await model.unload() print("\nModel unloaded.") if __name__ == "__main__": - main() + asyncio.run(main()) diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml index ef93b6f7..851ea0a6 100644 --- a/sdk/python/pyproject.toml +++ b/sdk/python/pyproject.toml @@ -53,3 +53,5 @@ python_files = ["test_*.py"] python_classes = ["Test*"] python_functions = ["test_*"] timeout = 60 +asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "session" diff --git a/sdk/python/requirements-dev.txt b/sdk/python/requirements-dev.txt index aea40875..c581a02b 100644 --- a/sdk/python/requirements-dev.txt +++ b/sdk/python/requirements-dev.txt @@ -2,4 +2,5 @@ build coverage pytest +pytest-asyncio pytest-timeout diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt index 666a3721..28daade2 100644 --- a/sdk/python/requirements.txt +++ b/sdk/python/requirements.txt @@ -1,8 +1,7 @@ pydantic>=2.0.0 requests>=2.32.4 openai>=2.24.0 -# Standard native binary packages from the ORT-Nightly PyPI feed. -foundry-local-core==1.0.0rc1 +foundry-local-core==1.0.0 onnxruntime-core==1.24.4; sys_platform != "linux" onnxruntime-gpu==1.24.4; sys_platform == "linux" onnxruntime-genai-core==0.13.1; sys_platform != "linux" diff --git a/sdk/python/src/catalog.py b/sdk/python/src/catalog.py index 51f5bd8f..b79e2473 100644 --- a/sdk/python/src/catalog.py +++ b/sdk/python/src/catalog.py @@ -5,9 +5,9 @@ from __future__ import annotations +import asyncio import datetime import logging -import threading from typing import List, Optional from pydantic import TypeAdapter @@ -38,26 +38,26 @@ def __init__(self, model_load_manager: ModelLoadManager, core_interop: CoreInter """ self._core_interop = core_interop self._model_load_manager = model_load_manager - self._lock = threading.Lock() + self._lock = asyncio.Lock() self._models: List[ModelInfo] = [] self._model_alias_to_model = {} self._model_id_to_model_variant = {} self._last_fetch = datetime.datetime.min - response = core_interop.execute_command("get_catalog_name") + response = core_interop._execute_command("get_catalog_name") if response.error is not None: raise FoundryLocalException(f"Failed to get catalog name: {response.error}") self.name = response.data - def _update_models(self): - with self._lock: + async def _update_models(self): + async with self._lock: # refresh every 6 hours if (datetime.datetime.now() - self._last_fetch) < datetime.timedelta(hours=6): return - response = self._core_interop.execute_command("get_model_list") + response = await self._core_interop.execute_command("get_model_list") if response.error is not None: raise FoundryLocalException(f"Failed to get model list: {response.error}") @@ -84,28 +84,28 @@ def _update_models(self): self._models = models self._last_fetch = datetime.datetime.now() - def _invalidate_cache(self): - with self._lock: + async def _invalidate_cache(self): + async with self._lock: self._last_fetch = datetime.datetime.min - def list_models(self) -> List[IModel]: + async def list_models(self) -> List[IModel]: """ List the available models in the catalog. :return: List of IModel instances. """ - self._update_models() + await self._update_models() return list(self._model_alias_to_model.values()) - def get_model(self, model_alias: str) -> Optional[IModel]: + async def get_model(self, model_alias: str) -> Optional[IModel]: """ Lookup a model by its alias. :param model_alias: Model alias. :return: IModel if found. """ - self._update_models() + await self._update_models() return self._model_alias_to_model.get(model_alias) - def get_model_variant(self, model_id: str) -> Optional[IModel]: + async def get_model_variant(self, model_id: str) -> Optional[IModel]: """ Lookup a model variant by its unique model id. NOTE: This will return an IModel with a single variant. Use get_model to get an IModel with all available @@ -113,10 +113,10 @@ def get_model_variant(self, model_id: str) -> Optional[IModel]: :param model_id: Model id. :return: IModel if found. """ - self._update_models() + await self._update_models() return self._model_id_to_model_variant.get(model_id) - def get_latest_version(self, model_or_model_variant: IModel) -> IModel: + async def get_latest_version(self, model_or_model_variant: IModel) -> IModel: """ Resolve the latest catalog version for the provided model or variant. @@ -124,7 +124,7 @@ def get_latest_version(self, model_or_model_variant: IModel) -> IModel: :return: Latest catalog version for the same model name. :raises FoundryLocalException: If the alias or name cannot be resolved. """ - self._update_models() + await self._update_models() model = self._model_alias_to_model.get(model_or_model_variant.alias) if model is None: @@ -144,14 +144,14 @@ def get_latest_version(self, model_or_model_variant: IModel) -> IModel: return model_or_model_variant if latest.id == model_or_model_variant.id else latest - def get_cached_models(self) -> List[IModel]: + async def get_cached_models(self) -> List[IModel]: """ Get a list of currently downloaded models from the model cache. :return: List of IModel instances. """ - self._update_models() + await self._update_models() - cached_model_ids = get_cached_model_ids(self._core_interop) + cached_model_ids = await get_cached_model_ids(self._core_interop) cached_models: List[IModel] = [] for model_id in cached_model_ids: @@ -161,14 +161,14 @@ def get_cached_models(self) -> List[IModel]: return cached_models - def get_loaded_models(self) -> List[IModel]: + async def get_loaded_models(self) -> List[IModel]: """ Get a list of the currently loaded models. :return: List of IModel instances. """ - self._update_models() + await self._update_models() - loaded_model_ids = self._model_load_manager.list_loaded() + loaded_model_ids = await self._model_load_manager.list_loaded() loaded_models: List[IModel] = [] for model_id in loaded_model_ids: diff --git a/sdk/python/src/detail/core_interop.py b/sdk/python/src/detail/core_interop.py index 1cd53e33..aa520f03 100644 --- a/sdk/python/src/detail/core_interop.py +++ b/sdk/python/src/detail/core_interop.py @@ -5,11 +5,13 @@ from __future__ import annotations +import asyncio import ctypes import json import logging import os import sys +import threading from dataclasses import dataclass from pathlib import Path @@ -104,6 +106,9 @@ class CoreInterop: instance = None + # Serialize native calls — the underlying C library may not be thread-safe. + _native_lock = threading.Lock() + # Callback function for native interop. # Returns c_int: 0 = continue, 1 = cancel. CALLBACK_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p) @@ -218,7 +223,7 @@ def __init__(self, config: Configuration): config.additional_settings["Bootstrap"] = "true" request = InteropRequest(params=config.as_dictionary()) - response = self.execute_command("initialize", request) + response = self._execute_command("initialize", request) if response.error is not None: raise FoundryLocalException(f"Failed to initialize Foundry.Local.Core: {response.error}") @@ -226,41 +231,42 @@ def __init__(self, config: Configuration): def _execute_command(self, command: str, interop_request: InteropRequest = None, callback: CoreInterop.CALLBACK_TYPE = None): - cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command) - data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None) + with CoreInterop._native_lock: + cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command) + data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None) - req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len) - resp = ResponseBuffer() - lib = CoreInterop._flcore_library + req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len) + resp = ResponseBuffer() + lib = CoreInterop._flcore_library - if (callback is not None): - # If a callback is provided, use the execute_command_with_callback method - # We need a helper to do the initial conversion from ctypes to Python and pass it through to the - # provided callback function - callback_helper = CallbackHelper(callback) - callback_py_obj = ctypes.py_object(callback_helper) - callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p) - callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback) + if (callback is not None): + # If a callback is provided, use the execute_command_with_callback method + # We need a helper to do the initial conversion from ctypes to Python and pass it through to the + # provided callback function + callback_helper = CallbackHelper(callback) + callback_py_obj = ctypes.py_object(callback_helper) + callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p) + callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback) - lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr) + lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr) - if callback_helper.exception is not None: - raise callback_helper.exception - else: - lib.execute_command(ctypes.byref(req), ctypes.byref(resp)) + if callback_helper.exception is not None: + raise callback_helper.exception + else: + lib.execute_command(ctypes.byref(req), ctypes.byref(resp)) - req = None # Free Python reference to request + req = None # Free Python reference to request - response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None - error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None + response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None + error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None - # C# owns the memory in the response so we need to free it explicitly - lib.free_response(resp) - - return Response(data=response_str, error=error_str) + # C# owns the memory in the response so we need to free it explicitly + lib.free_response(resp) + + return Response(data=response_str, error=error_str) - def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response: - """Execute a command synchronously. + async def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response: + """Execute a command asynchronously. Args: command_name: The native command name (e.g. ``"get_model_list"``). @@ -272,10 +278,9 @@ def execute_command(self, command_name: str, command_input: Optional[InteropRequ logger.debug("Executing command: %s Input: %s", command_name, command_input.params if command_input else None) - response = self._execute_command(command_name, command_input) - return response + return await asyncio.to_thread(self._execute_command, command_name, command_input) - def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest], + async def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest], callback: Callable[[str], None]) -> Response: """Execute a command with a streaming callback. @@ -292,14 +297,13 @@ def execute_command_with_callback(self, command_name: str, command_input: Option """ logger.debug("Executing command with callback: %s Input: %s", command_name, command_input.params if command_input else None) - response = self._execute_command(command_name, command_input, callback) - return response + return await asyncio.to_thread(self._execute_command, command_name, command_input, callback) -def get_cached_model_ids(core_interop: CoreInterop) -> list[str]: +async def get_cached_model_ids(core_interop: CoreInterop) -> list[str]: """Get the list of models that have been downloaded and are cached.""" - response = core_interop.execute_command("get_cached_models") + response = await core_interop.execute_command("get_cached_models") if response.error is not None: raise FoundryLocalException(f"Failed to get cached models: {response.error}") diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py index 189920b1..9f13d23e 100644 --- a/sdk/python/src/detail/model.py +++ b/sdk/python/src/detail/model.py @@ -104,35 +104,33 @@ def supports_tool_calling(self) -> Optional[bool]: """Whether the currently selected variant supports tool/function calling.""" return self._selected_variant.supports_tool_calling - @property - def is_cached(self) -> bool: + async def is_cached(self) -> bool: """Is the currently selected variant cached locally?""" - return self._selected_variant.is_cached + return await self._selected_variant.is_cached() - @property - def is_loaded(self) -> bool: + async def is_loaded(self) -> bool: """Is the currently selected variant loaded in memory?""" - return self._selected_variant.is_loaded + return await self._selected_variant.is_loaded() - def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None: + async def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None: """Download the currently selected variant.""" - self._selected_variant.download(progress_callback) + await self._selected_variant.download(progress_callback) - def get_path(self) -> str: + async def get_path(self) -> str: """Get the path to the currently selected variant.""" - return self._selected_variant.get_path() + return await self._selected_variant.get_path() - def load(self) -> None: + async def load(self) -> None: """Load the currently selected variant into memory.""" - self._selected_variant.load() + await self._selected_variant.load() - def unload(self) -> None: + async def unload(self) -> None: """Unload the currently selected variant from memory.""" - self._selected_variant.unload() + await self._selected_variant.unload() - def remove_from_cache(self) -> None: + async def remove_from_cache(self) -> None: """Remove the currently selected variant from the local cache.""" - self._selected_variant.remove_from_cache() + await self._selected_variant.remove_from_cache() def get_chat_client(self) -> ChatClient: """Get a chat client for the currently selected variant.""" diff --git a/sdk/python/src/detail/model_load_manager.py b/sdk/python/src/detail/model_load_manager.py index 8ffd087a..2633d18c 100644 --- a/sdk/python/src/detail/model_load_manager.py +++ b/sdk/python/src/detail/model_load_manager.py @@ -4,6 +4,7 @@ # -------------------------------------------------------------------------- from __future__ import annotations +import asyncio import json import logging import requests @@ -32,7 +33,7 @@ def __init__(self, core_interop: CoreInterop, external_service_url: str = None): self._core_interop = core_interop self._external_service_url = external_service_url - def load(self, model_id: str) -> None: + async def load(self, model_id: str) -> None: """ Load a model by its ID. @@ -47,37 +48,37 @@ def load(self, model_id: str) -> None: communicating with the external service. """ if self._external_service_url: - self._web_load_model(model_id) + await self._web_load_model(model_id) return request = InteropRequest({"Model": model_id}) - response = self._core_interop.execute_command("load_model", request) + response = await self._core_interop.execute_command("load_model", request) if response.error is not None: raise FoundryLocalException(f"Failed to load model {model_id}: {response.error}") - def unload(self, model_id: str) -> None: + async def unload(self, model_id: str) -> None: """ Unload a model by its ID. :param model_id: The ID of the model to unload. """ if self._external_service_url: - self._web_unload_model(model_id) + await self._web_unload_model(model_id) return request = InteropRequest({"Model": model_id}) - response = self._core_interop.execute_command("unload_model", request) + response = await self._core_interop.execute_command("unload_model", request) if response.error is not None: raise FoundryLocalException(f"Failed to unload model {model_id}: {response.error}") - def list_loaded(self) -> list[str]: + async def list_loaded(self) -> list[str]: """ List loaded models. :return: List of loaded model IDs """ if self._external_service_url: - return self._web_list_loaded_models() + return await self._web_list_loaded_models() - response = self._core_interop.execute_command("list_loaded_models") + response = await self._core_interop.execute_command("list_loaded_models") if response.error is not None: raise FoundryLocalException(f"Failed to list loaded models: {response.error}") @@ -88,9 +89,12 @@ def list_loaded(self) -> list[str]: return model_ids - def _web_list_loaded_models(self) -> List[str]: + async def _web_list_loaded_models(self) -> List[str]: try: - response = requests.get(f"{self._external_service_url}/models/loaded", headers=self._headers, timeout=10) + response = await asyncio.to_thread( + requests.get, f"{self._external_service_url}/models/loaded", + headers=self._headers, timeout=10 + ) if not response.ok: raise FoundryLocalException( @@ -109,7 +113,7 @@ def _web_list_loaded_models(self) -> List[str]: except json.JSONDecodeError as e: raise FoundryLocalException(f"Failed to decode JSON response: Response was: {content}") from e - def _web_load_model(self, model_id: str) -> None: + async def _web_load_model(self, model_id: str) -> None: """ Load a model via the external web service. @@ -126,7 +130,7 @@ def _web_load_model(self, model_id: str) -> None: # } # response = requests.get(url, params=query_params) - response = requests.get(url, headers=self._headers, timeout=10) + response = await asyncio.to_thread(requests.get, url, headers=self._headers, timeout=10) if not response.ok: raise FoundryLocalException( @@ -143,12 +147,12 @@ def _web_load_model(self, model_id: str) -> None: f"HTTP request failed when loading model {model_id} from {self._external_service_url}: {e}" ) from e - def _web_unload_model(self, model_id: str) -> None: + async def _web_unload_model(self, model_id: str) -> None: try: encoded_model_id = quote(model_id) url = f"{self._external_service_url}/models/unload/{encoded_model_id}" - response = requests.get(url, headers=self._headers, timeout=10) + response = await asyncio.to_thread(requests.get, url, headers=self._headers, timeout=10) if not response.ok: raise FoundryLocalException( diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py index a5ac02d4..e6fab104 100644 --- a/sdk/python/src/detail/model_variant.py +++ b/sdk/python/src/detail/model_variant.py @@ -4,6 +4,7 @@ # -------------------------------------------------------------------------- from __future__ import annotations +import asyncio import logging from typing import Callable, List, Optional @@ -99,19 +100,17 @@ def supports_tool_calling(self) -> Optional[bool]: """Whether this variant supports tool/function calling, or ``None`` if unknown.""" return self._model_info.supports_tool_calling - @property - def is_cached(self) -> bool: + async def is_cached(self) -> bool: """``True`` if this variant is present in the local model cache.""" - cached_model_ids = get_cached_model_ids(self._core_interop) + cached_model_ids = await get_cached_model_ids(self._core_interop) return self.id in cached_model_ids - @property - def is_loaded(self) -> bool: + async def is_loaded(self) -> bool: """``True`` if this variant is currently loaded into memory.""" - loaded_model_ids = self._model_load_manager.list_loaded() + loaded_model_ids = await self._model_load_manager.list_loaded() return self.id in loaded_model_ids - def download(self, progress_callback: Callable[[float], None] = None): + async def download(self, progress_callback: Callable[[float], None] = None): """Download this variant to the local cache. Args: @@ -120,18 +119,19 @@ def download(self, progress_callback: Callable[[float], None] = None): """ request = InteropRequest(params={"Model": self.id}) if progress_callback is None: - response = self._core_interop.execute_command("download_model", request) + response = await self._core_interop.execute_command("download_model", request) else: - response = self._core_interop.execute_command_with_callback( + loop = asyncio.get_running_loop() + response = await self._core_interop.execute_command_with_callback( "download_model", request, - lambda pct_str: progress_callback(float(pct_str)) + lambda pct_str: loop.call_soon_threadsafe(progress_callback, float(pct_str)) ) logger.info("Download response: %s", response) if response.error is not None: raise FoundryLocalException(f"Failed to download model: {response.error}") - def get_path(self) -> str: + async def get_path(self) -> str: """Get the local file-system path to this variant if cached. Returns: @@ -141,27 +141,27 @@ def get_path(self) -> str: FoundryLocalException: If the model path cannot be retrieved. """ request = InteropRequest(params={"Model": self.id}) - response = self._core_interop.execute_command("get_model_path", request) + response = await self._core_interop.execute_command("get_model_path", request) if response.error is not None: raise FoundryLocalException(f"Failed to get model path: {response.error}") return response.data - def load(self) -> None: + async def load(self) -> None: """Load this variant into memory for inference.""" - self._model_load_manager.load(self.id) + await self._model_load_manager.load(self.id) - def remove_from_cache(self) -> None: + async def remove_from_cache(self) -> None: """Remove this variant from the local model cache.""" request = InteropRequest(params={"Model": self.id}) - response = self._core_interop.execute_command("remove_cached_model", request) + response = await self._core_interop.execute_command("remove_cached_model", request) if response.error is not None: raise FoundryLocalException(f"Failed to remove model from cache: {response.error}") - def unload(self) -> None: + async def unload(self) -> None: """Unload this variant from memory.""" - self._model_load_manager.unload(self.id) + await self._model_load_manager.unload(self.id) def get_chat_client(self) -> ChatClient: """Create an OpenAI-compatible ``ChatClient`` for this variant.""" diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py index a649f8e5..b402c500 100644 --- a/sdk/python/src/foundry_local_manager.py +++ b/sdk/python/src/foundry_local_manager.py @@ -5,6 +5,7 @@ from __future__ import annotations +import asyncio import json import logging import threading @@ -27,8 +28,8 @@ class FoundryLocalManager: """Singleton manager for Foundry Local SDK operations. - Call ``FoundryLocalManager.initialize(config)`` once at startup, then access - the singleton via ``FoundryLocalManager.instance``. + Call ``await FoundryLocalManager.initialize(config)`` once at startup, then + access the singleton via ``FoundryLocalManager.instance``. Attributes: instance: The singleton ``FoundryLocalManager`` instance (set after ``initialize``). @@ -40,17 +41,20 @@ class FoundryLocalManager: instance: FoundryLocalManager = None @staticmethod - def initialize(config: Configuration): + async def initialize(config: Configuration): """Initialize the Foundry Local SDK with the given configuration. - This method must be called before using any other part of the SDK. + This coroutine must be awaited before using any other part of the SDK:: + + await FoundryLocalManager.initialize(config) + manager = FoundryLocalManager.instance Args: config: Configuration object for the SDK. """ - # Delegate singleton creation to the constructor, which enforces - # the singleton invariant under a lock and sets `instance`. - FoundryLocalManager(config) + # Run the synchronous constructor in a thread to avoid blocking + # the event loop during native library initialization. + await asyncio.to_thread(FoundryLocalManager, config) def __init__(self, config: Configuration): # Enforce singleton creation under a class-level lock and ensure @@ -66,6 +70,7 @@ def __init__(self, config: Configuration): FoundryLocalManager.instance = self self.urls = None + self._async_lock = asyncio.Lock() def _initialize(self): set_default_logger_severity(self.config.log_level) @@ -76,7 +81,7 @@ def _initialize(self): self._model_load_manager = ModelLoadManager(self._core_interop, external_service_url) self.catalog = Catalog(self._model_load_manager, self._core_interop) - def discover_eps(self) -> list[EpInfo]: + async def discover_eps(self) -> list[EpInfo]: """Discover available execution providers and their registration status. Returns: @@ -85,7 +90,7 @@ def discover_eps(self) -> list[EpInfo]: Raises: FoundryLocalException: If EP discovery fails or response JSON is invalid. """ - response = self._core_interop.execute_command("discover_eps") + response = await self._core_interop.execute_command("discover_eps") if response.error is not None: raise FoundryLocalException(f"Error discovering execution providers: {response.error}") @@ -97,7 +102,7 @@ def discover_eps(self) -> list[EpInfo]: f"Failed to decode JSON response from discover_eps: {e}. Response was: {response.data}" ) from e - def download_and_register_eps( + async def download_and_register_eps( self, names: Optional[list[str]] = None, progress_callback: Optional[Callable[[str, float], None]] = None, @@ -121,21 +126,23 @@ def download_and_register_eps( request = InteropRequest(params={"Names": ",".join(names)}) if progress_callback is not None: + loop = asyncio.get_running_loop() + def _on_chunk(chunk: str) -> None: sep = chunk.find("|") if sep >= 0: ep_name = chunk[:sep] or "" try: percent = float(chunk[sep + 1:]) - progress_callback(ep_name, percent) + loop.call_soon_threadsafe(progress_callback, ep_name, percent) except ValueError: pass - response = self._core_interop.execute_command_with_callback( + response = await self._core_interop.execute_command_with_callback( "download_and_register_eps", request, _on_chunk ) else: - response = self._core_interop.execute_command("download_and_register_eps", request) + response = await self._core_interop.execute_command("download_and_register_eps", request) if response.error is not None: raise FoundryLocalException(f"Error downloading execution providers: {response.error}") @@ -157,11 +164,11 @@ def _on_chunk(chunk: str) -> None: # Invalidate the catalog cache if any EP was newly registered so the next access # re-fetches models with the updated set of available EPs. if ep_result.success or len(ep_result.registered_eps) > 0: - self.catalog._invalidate_cache() + await self.catalog._invalidate_cache() return ep_result - def start_web_service(self): + async def start_web_service(self): """Start the optional web service. If provided, the service will be bound to the value of Configuration.web.urls. @@ -169,8 +176,8 @@ def start_web_service(self): FoundryLocalManager.urls will be updated with the actual URL/s the service is listening on. """ - with FoundryLocalManager._lock: - response = self._core_interop.execute_command("start_service") + async with self._async_lock: + response = await self._core_interop.execute_command("start_service") if response.error is not None: raise FoundryLocalException(f"Error starting web service: {response.error}") @@ -181,14 +188,14 @@ def start_web_service(self): self.urls = bound_urls - def stop_web_service(self): + async def stop_web_service(self): """Stop the optional web service.""" - with FoundryLocalManager._lock: + async with self._async_lock: if self.urls is None: raise FoundryLocalException("Web service is not running.") - response = self._core_interop.execute_command("stop_service") + response = await self._core_interop.execute_command("stop_service") if response.error is not None: raise FoundryLocalException(f"Error stopping web service: {response.error}") diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py index 8237aeb4..e1e7bcf2 100644 --- a/sdk/python/src/imodel.py +++ b/sdk/python/src/imodel.py @@ -32,15 +32,13 @@ def info(self) -> ModelInfo: """Full model metadata.""" pass - @property @abstractmethod - def is_cached(self) -> bool: + async def is_cached(self) -> bool: """True if the model is present in the local cache.""" pass - @property @abstractmethod - def is_loaded(self) -> bool: + async def is_loaded(self) -> bool: """True if the model is loaded into memory.""" pass @@ -75,7 +73,7 @@ def supports_tool_calling(self) -> Optional[bool]: pass @abstractmethod - def download(self, progress_callback: Callable[[float], None] = None) -> None: + async def download(self, progress_callback: Callable[[float], None] = None) -> None: """ Download the model to local cache if not already present. :param progress_callback: Optional callback function for download progress as a percentage (0.0 to 100.0). @@ -83,7 +81,7 @@ def download(self, progress_callback: Callable[[float], None] = None) -> None: pass @abstractmethod - def get_path(self) -> str: + async def get_path(self) -> str: """ Gets the model path if cached. :return: Path of model directory. @@ -91,21 +89,21 @@ def get_path(self) -> str: pass @abstractmethod - def load(self) -> None: + async def load(self) -> None: """ Load the model into memory if not already loaded. """ pass @abstractmethod - def remove_from_cache(self) -> None: + async def remove_from_cache(self) -> None: """ Remove the model from the local cache. """ pass @abstractmethod - def unload(self) -> None: + async def unload(self) -> None: """ Unload the model if loaded. """ diff --git a/sdk/python/src/openai/audio_client.py b/sdk/python/src/openai/audio_client.py index 0858e4aa..ff8f133d 100644 --- a/sdk/python/src/openai/audio_client.py +++ b/sdk/python/src/openai/audio_client.py @@ -5,12 +5,12 @@ from __future__ import annotations +import asyncio import json import logging -import queue import threading from dataclasses import dataclass -from typing import Generator, List, Optional +from typing import AsyncGenerator, List, Optional from ..detail.core_interop import CoreInterop, InteropRequest from ..exception import FoundryLocalException @@ -89,7 +89,7 @@ def _create_request_json(self, audio_file_path: str) -> str: return json.dumps(request) - def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse: + async def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse: """Transcribe an audio file (non-streaming). Args: @@ -107,7 +107,7 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse: request_json = self._create_request_json(audio_file_path) request = InteropRequest(params={"OpenAICreateRequest": request_json}) - response = self._core_interop.execute_command("audio_transcribe", request) + response = await self._core_interop.execute_command("audio_transcribe", request) if response.error is not None: raise FoundryLocalException( f"Audio transcription failed for model '{self.model_id}': {response.error}" @@ -116,19 +116,23 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse: data = json.loads(response.data) return AudioTranscriptionResponse(text=data.get("text", "")) - def _stream_chunks(self, request_json: str) -> Generator[AudioTranscriptionResponse, None, None]: - """Background-thread generator that yields parsed chunks from the native streaming call.""" - _SENTINEL = object() - chunk_queue: queue.Queue = queue.Queue() + async def _stream_chunks(self, request_json: str) -> AsyncGenerator[AudioTranscriptionResponse, None]: + """Async generator that yields parsed chunks from the native streaming call.""" + chunk_queue: asyncio.Queue = asyncio.Queue() + loop = asyncio.get_running_loop() errors: List[Exception] = [] + cancelled = threading.Event() def _on_chunk(chunk_str: str) -> None: + if cancelled.is_set(): + return chunk_data = json.loads(chunk_str) - chunk_queue.put(AudioTranscriptionResponse(text=chunk_data.get("text", ""))) + chunk = AudioTranscriptionResponse(text=chunk_data.get("text", "")) + loop.call_soon_threadsafe(chunk_queue.put_nowait, chunk) - def _run() -> None: + async def _execute() -> None: try: - resp = self._core_interop.execute_command_with_callback( + resp = await self._core_interop.execute_command_with_callback( "audio_transcribe", InteropRequest(params={"OpenAICreateRequest": request_json}), _on_chunk, @@ -142,30 +146,44 @@ def _run() -> None: except Exception as exc: errors.append(exc) finally: - chunk_queue.put(_SENTINEL) - - threading.Thread(target=_run, daemon=True).start() - while (item := chunk_queue.get()) is not _SENTINEL: - yield item + await chunk_queue.put(None) + + task = asyncio.create_task(_execute()) + try: + while True: + item = await chunk_queue.get() + if item is None: + break + yield item + finally: + # Signal the callback to drop further chunks, then cancel the task. + # The native call may continue on its worker thread, but _on_chunk + # will no-op so the queue stops growing. + cancelled.set() + task.cancel() + try: + await task + except asyncio.CancelledError: + pass if errors: raise errors[0] - def transcribe_streaming( + async def transcribe_streaming( self, audio_file_path: str, - ) -> Generator[AudioTranscriptionResponse, None, None]: + ) -> AsyncGenerator[AudioTranscriptionResponse, None]: """Transcribe an audio file with streaming chunks. - Consume with a standard ``for`` loop:: + Consume with a standard ``async for`` loop:: - for chunk in audio_client.transcribe_streaming("recording.mp3"): + async for chunk in audio_client.transcribe_streaming("recording.mp3"): print(chunk.text, end="", flush=True) Args: audio_file_path: Path to the audio file to transcribe. Returns: - A generator of ``AudioTranscriptionResponse`` objects. + An async generator of ``AudioTranscriptionResponse`` objects. Raises: ValueError: If *audio_file_path* is not a non-empty string. @@ -174,4 +192,5 @@ def transcribe_streaming( self._validate_audio_file_path(audio_file_path) request_json = self._create_request_json(audio_file_path) - return self._stream_chunks(request_json) \ No newline at end of file + async for chunk in self._stream_chunks(request_json): + yield chunk \ No newline at end of file diff --git a/sdk/python/src/openai/chat_client.py b/sdk/python/src/openai/chat_client.py index 0b0d58bc..4b1b54c5 100644 --- a/sdk/python/src/openai/chat_client.py +++ b/sdk/python/src/openai/chat_client.py @@ -5,9 +5,9 @@ from __future__ import annotations +import asyncio import logging import json -import queue import threading from ..detail.core_interop import CoreInterop, InteropRequest @@ -18,7 +18,7 @@ CompletionCreateParamsStreaming from openai.types.chat import ChatCompletion from openai.types.chat.chat_completion_chunk import ChatCompletionChunk -from typing import Any, Dict, Generator, List, Optional +from typing import Any, AsyncGenerator, Dict, List, Optional logger = logging.getLogger(__name__) @@ -192,7 +192,7 @@ def _create_request( return json.dumps(chat_request) - def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None): + async def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None): """Perform a non-streaming chat completion. Args: @@ -212,7 +212,7 @@ def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optio # Send the request to the chat API request = InteropRequest(params={"OpenAICreateRequest": chat_request_json}) - response = self._core_interop.execute_command("chat_completions", request) + response = await self._core_interop.execute_command("chat_completions", request) if response.error is not None: raise FoundryLocalException(f"Error during chat completion: {response.error}") @@ -220,13 +220,16 @@ def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optio return completion - def _stream_chunks(self, chat_request_json: str) -> Generator[ChatCompletionChunk, None, None]: - """Background-thread generator that yields parsed chunks from the native streaming call.""" - _SENTINEL = object() - chunk_queue: queue.Queue = queue.Queue() + async def _stream_chunks(self, chat_request_json: str) -> AsyncGenerator[ChatCompletionChunk, None]: + """Async generator that yields parsed chunks from the native streaming call.""" + chunk_queue: asyncio.Queue = asyncio.Queue() + loop = asyncio.get_running_loop() errors: List[Exception] = [] + cancelled = threading.Event() def _on_chunk(response_str: str) -> None: + if cancelled.is_set(): + return raw = json.loads(response_str) # Foundry Local returns tool call chunks with "message.tool_calls" instead # of the standard streaming "delta.tool_calls". Normalize to delta format @@ -238,11 +241,12 @@ def _on_chunk(response_str: str) -> None: for i, tc in enumerate(msg.get("tool_calls", [])): tc.setdefault("index", i) choice["delta"] = msg - chunk_queue.put(ChatCompletionChunk.model_validate(raw)) + chunk = ChatCompletionChunk.model_validate(raw) + loop.call_soon_threadsafe(chunk_queue.put_nowait, chunk) - def _run() -> None: + async def _execute() -> None: try: - resp = self._core_interop.execute_command_with_callback( + resp = await self._core_interop.execute_command_with_callback( "chat_completions", InteropRequest(params={"OpenAICreateRequest": chat_request_json}), _on_chunk, @@ -252,24 +256,38 @@ def _run() -> None: except Exception as exc: errors.append(exc) finally: - chunk_queue.put(_SENTINEL) - - threading.Thread(target=_run, daemon=True).start() - while (item := chunk_queue.get()) is not _SENTINEL: - yield item + await chunk_queue.put(None) + + task = asyncio.create_task(_execute()) + try: + while True: + item = await chunk_queue.get() + if item is None: + break + yield item + finally: + # Signal the callback to drop further chunks, then cancel the task. + # The native call may continue on its worker thread, but _on_chunk + # will no-op so the queue stops growing. + cancelled.set() + task.cancel() + try: + await task + except asyncio.CancelledError: + pass if errors: raise errors[0] - def complete_streaming_chat( + async def complete_streaming_chat( self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None, - ) -> Generator[ChatCompletionChunk, None, None]: + ) -> AsyncGenerator[ChatCompletionChunk, None]: """Perform a streaming chat completion, yielding chunks as they arrive. - Consume with a standard ``for`` loop:: + Consume with a standard ``async for`` loop:: - for chunk in client.complete_streaming_chat(messages): + async for chunk in client.complete_streaming_chat(messages): if chunk.choices[0].delta.content: print(chunk.choices[0].delta.content, end="", flush=True) @@ -278,7 +296,7 @@ def complete_streaming_chat( tools: Optional list of tool definitions for function calling. Returns: - A generator of ``ChatCompletionChunk`` objects. + An async generator of ``ChatCompletionChunk`` objects. Raises: ValueError: If messages or tools are malformed. @@ -287,4 +305,5 @@ def complete_streaming_chat( self._validate_messages(messages) self._validate_tools(tools) chat_request_json = self._create_request(messages, streaming=True, tools=tools) - return self._stream_chunks(chat_request_json) + async for chunk in self._stream_chunks(chat_request_json): + yield chunk diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index 1cb85704..53f12124 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -16,6 +16,7 @@ import logging import pytest +import pytest_asyncio from pathlib import Path @@ -105,14 +106,14 @@ def get_multiply_tool(): # Session-scoped fixtures # --------------------------------------------------------------------------- -@pytest.fixture(scope="session") -def manager(): +@pytest_asyncio.fixture(scope="session") +async def manager(): """Initialize FoundryLocalManager once for the entire test session.""" # Reset singleton in case a previous run left state FoundryLocalManager.instance = None config = get_test_config() - FoundryLocalManager.initialize(config) + await FoundryLocalManager.initialize(config) mgr = FoundryLocalManager.instance assert mgr is not None, "FoundryLocalManager.initialize did not set instance" @@ -121,10 +122,10 @@ def manager(): # Teardown: unload all loaded models try: catalog = mgr.catalog - loaded = catalog.get_loaded_models() + loaded = await catalog.get_loaded_models() for model_variant in loaded: try: - model_variant.unload() + await model_variant.unload() except Exception as e: logger.warning("Failed to unload model %s during teardown: %s", model_variant.id, e) except Exception as e: @@ -134,19 +135,19 @@ def manager(): FoundryLocalManager.instance = None -@pytest.fixture(scope="session") -def catalog(manager): +@pytest_asyncio.fixture(scope="session") +async def catalog(manager): """Return the Catalog from the session-scoped manager.""" return manager.catalog -@pytest.fixture(scope="session") -def core_interop(manager): +@pytest_asyncio.fixture(scope="session") +async def core_interop(manager): """Return the CoreInterop from the session-scoped manager (internal, for component tests).""" return manager._core_interop -@pytest.fixture(scope="session") -def model_load_manager(manager): +@pytest_asyncio.fixture(scope="session") +async def model_load_manager(manager): """Return the ModelLoadManager from the session-scoped manager (internal, for component tests).""" return manager._model_load_manager diff --git a/sdk/python/test/detail/test_model_load_manager.py b/sdk/python/test/detail/test_model_load_manager.py index a5a231e3..a64b9e14 100644 --- a/sdk/python/test/detail/test_model_load_manager.py +++ b/sdk/python/test/detail/test_model_load_manager.py @@ -15,67 +15,71 @@ class TestModelLoadManagerCoreInterop: """ModelLoadManager tests using Core Interop (no external URL).""" - def _get_model_id(self, catalog) -> str: + async def _get_model_id(self, catalog) -> str: """Resolve the variant ID for the test model alias.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached" return variant.id - def test_should_load_model(self, catalog, core_interop): + @pytest.mark.asyncio + async def test_should_load_model(self, catalog, core_interop): """Load model via core interop and verify it appears in loaded list.""" - model_id = self._get_model_id(catalog) + model_id = await self._get_model_id(catalog) mlm = ModelLoadManager(core_interop) - mlm.load(model_id) - loaded = mlm.list_loaded() + await mlm.load(model_id) + loaded = await mlm.list_loaded() assert model_id in loaded # Cleanup - mlm.unload(model_id) + await mlm.unload(model_id) - def test_should_unload_model(self, catalog, core_interop): + @pytest.mark.asyncio + async def test_should_unload_model(self, catalog, core_interop): """Load then unload model via core interop.""" - model_id = self._get_model_id(catalog) + model_id = await self._get_model_id(catalog) mlm = ModelLoadManager(core_interop) - mlm.load(model_id) - loaded = mlm.list_loaded() + await mlm.load(model_id) + loaded = await mlm.list_loaded() assert model_id in loaded - mlm.unload(model_id) - loaded = mlm.list_loaded() + await mlm.unload(model_id) + loaded = await mlm.list_loaded() assert model_id not in loaded - def test_should_list_loaded_models(self, catalog, core_interop): + @pytest.mark.asyncio + async def test_should_list_loaded_models(self, catalog, core_interop): """list_loaded() should return an array containing the loaded model.""" - model_id = self._get_model_id(catalog) + model_id = await self._get_model_id(catalog) mlm = ModelLoadManager(core_interop) - mlm.load(model_id) - loaded = mlm.list_loaded() + await mlm.load(model_id) + loaded = await mlm.list_loaded() assert isinstance(loaded, list) assert model_id in loaded # Cleanup - mlm.unload(model_id) + await mlm.unload(model_id) class TestModelLoadManagerExternalService: """ModelLoadManager tests using external web service URL (skipped in CI).""" @skip_in_ci - def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop): + @pytest.mark.asyncio + async def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop): """Load/unload model through the web service endpoint.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) assert variant is not None model_id = variant.id # Start web service try: - manager.start_web_service() + await manager.start_web_service() except Exception as e: pytest.skip(f"Failed to start web service: {e}") @@ -88,33 +92,34 @@ def test_should_load_and_unload_via_external_service(self, manager, catalog, cor try: # Setup: load via core interop setup_mlm = ModelLoadManager(core_interop) - setup_mlm.load(model_id) - loaded = setup_mlm.list_loaded() + await setup_mlm.load(model_id) + loaded = await setup_mlm.list_loaded() assert model_id in loaded # Unload via external service ext_mlm = ModelLoadManager(core_interop, service_url) - ext_mlm.unload(model_id) + await ext_mlm.unload(model_id) # Verify via core interop - loaded = setup_mlm.list_loaded() + loaded = await setup_mlm.list_loaded() assert model_id not in loaded finally: try: - manager.stop_web_service() + await manager.stop_web_service() except Exception: pass @skip_in_ci - def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop): + @pytest.mark.asyncio + async def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop): """list_loaded() through the web service endpoint should match core interop.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) assert variant is not None model_id = variant.id try: - manager.start_web_service() + await manager.start_web_service() except Exception as e: pytest.skip(f"Failed to start web service: {e}") @@ -127,18 +132,18 @@ def test_should_list_loaded_via_external_service(self, manager, catalog, core_in try: # Setup: load via core setup_mlm = ModelLoadManager(core_interop) - setup_mlm.load(model_id) + await setup_mlm.load(model_id) # Verify via external service ext_mlm = ModelLoadManager(core_interop, service_url) - loaded = ext_mlm.list_loaded() + loaded = await ext_mlm.list_loaded() assert isinstance(loaded, list) assert model_id in loaded # Cleanup - setup_mlm.unload(model_id) + await setup_mlm.unload(model_id) finally: try: - manager.stop_web_service() + await manager.stop_web_service() except Exception: pass diff --git a/sdk/python/test/openai/test_audio_client.py b/sdk/python/test/openai/test_audio_client.py index 0d365eef..160577be 100644 --- a/sdk/python/test/openai/test_audio_client.py +++ b/sdk/python/test/openai/test_audio_client.py @@ -19,28 +19,29 @@ ) -def _get_loaded_audio_model(catalog): +async def _get_loaded_audio_model(catalog): """Helper: ensure the whisper model is selected, loaded, and return Model.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() assert len(cached) > 0 cached_variant = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None) assert cached_variant is not None, f"{AUDIO_MODEL_ALIAS} should be cached" - model = catalog.get_model(AUDIO_MODEL_ALIAS) + model = await catalog.get_model(AUDIO_MODEL_ALIAS) assert model is not None model.select_variant(cached_variant) - model.load() + await model.load() return model class TestAudioClient: """Audio Client Tests.""" - def test_should_transcribe_audio(self, catalog): + @pytest.mark.asyncio + async def test_should_transcribe_audio(self, catalog): """Non-streaming transcription of Recording.mp3.""" - model = _get_loaded_audio_model(catalog) + model = await _get_loaded_audio_model(catalog) try: audio_client = model.get_audio_client() assert audio_client is not None @@ -48,7 +49,7 @@ def test_should_transcribe_audio(self, catalog): audio_client.settings.language = "en" audio_client.settings.temperature = 0.0 - response = audio_client.transcribe(AUDIO_FILE_PATH) + response = await audio_client.transcribe(AUDIO_FILE_PATH) assert response is not None assert hasattr(response, "text") @@ -56,11 +57,12 @@ def test_should_transcribe_audio(self, catalog): assert len(response.text) > 0 assert response.text == EXPECTED_TEXT finally: - model.unload() + await model.unload() - def test_should_transcribe_audio_with_temperature(self, catalog): + @pytest.mark.asyncio + async def test_should_transcribe_audio_with_temperature(self, catalog): """Non-streaming transcription with explicit temperature.""" - model = _get_loaded_audio_model(catalog) + model = await _get_loaded_audio_model(catalog) try: audio_client = model.get_audio_client() assert audio_client is not None @@ -68,18 +70,19 @@ def test_should_transcribe_audio_with_temperature(self, catalog): audio_client.settings.language = "en" audio_client.settings.temperature = 0.0 - response = audio_client.transcribe(AUDIO_FILE_PATH) + response = await audio_client.transcribe(AUDIO_FILE_PATH) assert response is not None assert isinstance(response.text, str) assert len(response.text) > 0 assert response.text == EXPECTED_TEXT finally: - model.unload() + await model.unload() - def test_should_transcribe_audio_streaming(self, catalog): + @pytest.mark.asyncio + async def test_should_transcribe_audio_streaming(self, catalog): """Streaming transcription of Recording.mp3.""" - model = _get_loaded_audio_model(catalog) + model = await _get_loaded_audio_model(catalog) try: audio_client = model.get_audio_client() assert audio_client is not None @@ -88,7 +91,7 @@ def test_should_transcribe_audio_streaming(self, catalog): audio_client.settings.temperature = 0.0 chunks = [] - for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH): + async for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH): assert chunk is not None assert hasattr(chunk, "text") assert isinstance(chunk.text, str) @@ -98,11 +101,12 @@ def test_should_transcribe_audio_streaming(self, catalog): full_text = "".join(chunks) assert full_text == EXPECTED_TEXT finally: - model.unload() + await model.unload() - def test_should_transcribe_audio_streaming_with_temperature(self, catalog): + @pytest.mark.asyncio + async def test_should_transcribe_audio_streaming_with_temperature(self, catalog): """Streaming transcription with explicit temperature.""" - model = _get_loaded_audio_model(catalog) + model = await _get_loaded_audio_model(catalog) try: audio_client = model.get_audio_client() assert audio_client is not None @@ -111,7 +115,7 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog): audio_client.settings.temperature = 0.0 chunks = [] - for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH): + async for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH): assert chunk is not None assert isinstance(chunk.text, str) chunks.append(chunk.text) @@ -119,22 +123,25 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog): full_text = "".join(chunks) assert full_text == EXPECTED_TEXT finally: - model.unload() + await model.unload() - def test_should_raise_for_empty_audio_file_path(self, catalog): + @pytest.mark.asyncio + async def test_should_raise_for_empty_audio_file_path(self, catalog): """transcribe('') should raise.""" - model = catalog.get_model(AUDIO_MODEL_ALIAS) + model = await catalog.get_model(AUDIO_MODEL_ALIAS) assert model is not None audio_client = model.get_audio_client() with pytest.raises(ValueError, match="Audio file path must be a non-empty string"): - audio_client.transcribe("") + await audio_client.transcribe("") - def test_should_raise_for_streaming_empty_audio_file_path(self, catalog): + @pytest.mark.asyncio + async def test_should_raise_for_streaming_empty_audio_file_path(self, catalog): """transcribe_streaming('') should raise.""" - model = catalog.get_model(AUDIO_MODEL_ALIAS) + model = await catalog.get_model(AUDIO_MODEL_ALIAS) assert model is not None audio_client = model.get_audio_client() with pytest.raises(ValueError, match="Audio file path must be a non-empty string"): - audio_client.transcribe_streaming("") + async for _ in audio_client.transcribe_streaming(""): + pass diff --git a/sdk/python/test/openai/test_chat_client.py b/sdk/python/test/openai/test_chat_client.py index d96891b9..3580c38b 100644 --- a/sdk/python/test/openai/test_chat_client.py +++ b/sdk/python/test/openai/test_chat_client.py @@ -13,34 +13,35 @@ from ..conftest import TEST_MODEL_ALIAS, get_multiply_tool -def _get_loaded_chat_model(catalog): +async def _get_loaded_chat_model(catalog): """Helper: ensure the test model is selected, loaded, and return Model + ChatClient.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() assert len(cached) > 0 cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) assert cached_variant is not None, f"{TEST_MODEL_ALIAS} should be cached" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None model.select_variant(cached_variant) - model.load() + await model.load() return model class TestChatClient: """Chat Client Tests.""" - def test_should_perform_chat_completion(self, catalog): + @pytest.mark.asyncio + async def test_should_perform_chat_completion(self, catalog): """Non-streaming chat: 7 * 6 should include '42' in the response.""" - model = _get_loaded_chat_model(catalog) + model = await _get_loaded_chat_model(catalog) try: client = model.get_chat_client() client.settings.max_tokens = 500 client.settings.temperature = 0.0 # deterministic - result = client.complete_chat([ + result = await client.complete_chat([ {"role": "user", "content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"} ]) @@ -53,11 +54,12 @@ def test_should_perform_chat_completion(self, catalog): assert isinstance(content, str) assert "42" in content finally: - model.unload() + await model.unload() - def test_should_perform_streaming_chat_completion(self, catalog): + @pytest.mark.asyncio + async def test_should_perform_streaming_chat_completion(self, catalog): """Streaming chat: 7 * 6 = 42, then follow-up +25 = 67.""" - model = _get_loaded_chat_model(catalog) + model = await _get_loaded_chat_model(catalog) try: client = model.get_chat_client() client.settings.max_tokens = 500 @@ -69,7 +71,7 @@ def test_should_perform_streaming_chat_completion(self, catalog): ] # ---- First question ---- - chunks = list(client.complete_streaming_chat(messages)) + chunks = [c async for c in client.complete_streaming_chat(messages)] assert len(chunks) > 0 first_response = "".join( c.choices[0].delta.content @@ -82,7 +84,7 @@ def test_should_perform_streaming_chat_completion(self, catalog): messages.append({"role": "assistant", "content": first_response}) messages.append({"role": "user", "content": "Add 25 to the previous answer. Think hard to be sure of the answer."}) - chunks = list(client.complete_streaming_chat(messages)) + chunks = [c async for c in client.complete_streaming_chat(messages)] assert len(chunks) > 0 second_response = "".join( c.choices[0].delta.content @@ -91,47 +93,54 @@ def test_should_perform_streaming_chat_completion(self, catalog): ) assert "67" in second_response finally: - model.unload() + await model.unload() - def test_should_raise_for_empty_messages(self, catalog): + @pytest.mark.asyncio + async def test_should_raise_for_empty_messages(self, catalog): """complete_chat with empty list should raise.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None client = model.get_chat_client() with pytest.raises(ValueError): - client.complete_chat([]) + await client.complete_chat([]) - def test_should_raise_for_none_messages(self, catalog): + @pytest.mark.asyncio + async def test_should_raise_for_none_messages(self, catalog): """complete_chat with None should raise.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None client = model.get_chat_client() with pytest.raises(ValueError): - client.complete_chat(None) + await client.complete_chat(None) - def test_should_raise_for_streaming_empty_messages(self, catalog): + @pytest.mark.asyncio + async def test_should_raise_for_streaming_empty_messages(self, catalog): """complete_streaming_chat with empty list should raise.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None client = model.get_chat_client() with pytest.raises(ValueError): - client.complete_streaming_chat([]) + async for _ in client.complete_streaming_chat([]): + pass - def test_should_raise_for_streaming_none_messages(self, catalog): + @pytest.mark.asyncio + async def test_should_raise_for_streaming_none_messages(self, catalog): """complete_streaming_chat with None should raise.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None client = model.get_chat_client() with pytest.raises(ValueError): - client.complete_streaming_chat(None) + async for _ in client.complete_streaming_chat(None): + pass - def test_should_perform_tool_calling_chat_completion(self, catalog): + @pytest.mark.asyncio + async def test_should_perform_tool_calling_chat_completion(self, catalog): """Tool calling (non-streaming): model uses multiply_numbers tool to answer 7 * 6.""" - model = _get_loaded_chat_model(catalog) + model = await _get_loaded_chat_model(catalog) try: client = model.get_chat_client() client.settings.max_tokens = 500 @@ -145,7 +154,7 @@ def test_should_perform_tool_calling_chat_completion(self, catalog): tools = [get_multiply_tool()] # First turn: model should respond with a tool call - response = client.complete_chat(messages, tools) + response = await client.complete_chat(messages, tools) assert response is not None assert response.choices is not None @@ -168,16 +177,17 @@ def test_should_perform_tool_calling_chat_completion(self, catalog): messages.append({"role": "system", "content": "Respond only with the answer generated by the tool."}) client.settings.tool_choice = {"type": "auto"} - response = client.complete_chat(messages, tools) + response = await client.complete_chat(messages, tools) assert response.choices[0].message.content is not None assert "42" in response.choices[0].message.content finally: - model.unload() + await model.unload() - def test_should_perform_tool_calling_streaming_chat_completion(self, catalog): + @pytest.mark.asyncio + async def test_should_perform_tool_calling_streaming_chat_completion(self, catalog): """Tool calling (streaming): model uses multiply_numbers tool, then continue conversation.""" - model = _get_loaded_chat_model(catalog) + model = await _get_loaded_chat_model(catalog) try: client = model.get_chat_client() client.settings.max_tokens = 500 @@ -191,7 +201,7 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog): tools = [get_multiply_tool()] # First turn: collect chunks and find the tool call - chunks = list(client.complete_streaming_chat(messages, tools)) + chunks = [c async for c in client.complete_streaming_chat(messages, tools)] last_tool_call_chunk = next( (c for c in reversed(chunks) if c.choices and c.choices[0].delta and c.choices[0].delta.tool_calls), @@ -216,7 +226,7 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog): client.settings.tool_choice = {"type": "auto"} - chunks = list(client.complete_streaming_chat(messages, tools)) + chunks = [c async for c in client.complete_streaming_chat(messages, tools)] second_response = "".join( c.choices[0].delta.content for c in chunks @@ -224,20 +234,18 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog): ) assert "42" in second_response finally: - model.unload() + await model.unload() - def test_should_return_generator(self, catalog): - """complete_streaming_chat returns a generator that yields chunks.""" - model = _get_loaded_chat_model(catalog) + @pytest.mark.asyncio + async def test_should_return_async_generator(self, catalog): + """complete_streaming_chat returns an async generator that yields chunks.""" + model = await _get_loaded_chat_model(catalog) try: client = model.get_chat_client() client.settings.max_tokens = 50 client.settings.temperature = 0.0 - result = client.complete_streaming_chat([{"role": "user", "content": "Say hi."}]) - - assert result is not None - chunks = list(result) + chunks = [c async for c in client.complete_streaming_chat([{"role": "user", "content": "Say hi."}])] assert len(chunks) > 0 finally: - model.unload() \ No newline at end of file + await model.unload() \ No newline at end of file diff --git a/sdk/python/test/test_catalog.py b/sdk/python/test/test_catalog.py index 2e5968cc..5d13326c 100644 --- a/sdk/python/test/test_catalog.py +++ b/sdk/python/test/test_catalog.py @@ -7,6 +7,7 @@ from __future__ import annotations import json +import pytest from foundry_local_sdk.catalog import Catalog from foundry_local_sdk.detail.core_interop import Response @@ -17,14 +18,16 @@ class TestCatalog: """Catalog Tests.""" - def test_should_initialize_with_catalog_name(self, catalog): + @pytest.mark.asyncio + async def test_should_initialize_with_catalog_name(self, catalog): """Catalog should expose a non-empty name string.""" assert isinstance(catalog.name, str) assert len(catalog.name) > 0 - def test_should_list_models(self, catalog): + @pytest.mark.asyncio + async def test_should_list_models(self, catalog): """list_models() should return a non-empty list containing the test model.""" - models = catalog.list_models() + models = await catalog.list_models() assert isinstance(models, list) assert len(models) > 0 @@ -32,25 +35,29 @@ def test_should_list_models(self, catalog): aliases = {m.alias for m in models} assert TEST_MODEL_ALIAS in aliases - def test_should_get_model_by_alias(self, catalog): + @pytest.mark.asyncio + async def test_should_get_model_by_alias(self, catalog): """get_model() should return a Model whose alias matches.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None assert model.alias == TEST_MODEL_ALIAS - def test_should_return_none_for_empty_alias(self, catalog): + @pytest.mark.asyncio + async def test_should_return_none_for_empty_alias(self, catalog): """get_model('') should return None (unknown alias).""" - result = catalog.get_model("") + result = await catalog.get_model("") assert result is None - def test_should_return_none_for_unknown_alias(self, catalog): + @pytest.mark.asyncio + async def test_should_return_none_for_unknown_alias(self, catalog): """get_model() with a random alias should return None.""" - result = catalog.get_model("definitely-not-a-real-model-alias-12345") + result = await catalog.get_model("definitely-not-a-real-model-alias-12345") assert result is None - def test_should_get_cached_models(self, catalog): + @pytest.mark.asyncio + async def test_should_get_cached_models(self, catalog): """get_cached_models() should return a list with at least the test model.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() assert isinstance(cached, list) assert len(cached) > 0 @@ -58,27 +65,31 @@ def test_should_get_cached_models(self, catalog): aliases = {m.alias for m in cached} assert TEST_MODEL_ALIAS in aliases - def test_should_get_model_variant_by_id(self, catalog): + @pytest.mark.asyncio + async def test_should_get_model_variant_by_id(self, catalog): """get_model_variant() with a valid ID should return the variant.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() assert len(cached) > 0 variant = cached[0] - result = catalog.get_model_variant(variant.id) + result = await catalog.get_model_variant(variant.id) assert result is not None assert result.id == variant.id - def test_should_return_none_for_empty_variant_id(self, catalog): + @pytest.mark.asyncio + async def test_should_return_none_for_empty_variant_id(self, catalog): """get_model_variant('') should return None.""" - result = catalog.get_model_variant("") + result = await catalog.get_model_variant("") assert result is None - def test_should_return_none_for_unknown_variant_id(self, catalog): + @pytest.mark.asyncio + async def test_should_return_none_for_unknown_variant_id(self, catalog): """get_model_variant() with a random ID should return None.""" - result = catalog.get_model_variant("definitely-not-a-real-model-id-12345") + result = await catalog.get_model_variant("definitely-not-a-real-model-id-12345") assert result is None - def test_should_resolve_latest_version_for_model_and_variant_inputs(self): + @pytest.mark.asyncio + async def test_should_resolve_latest_version_for_model_and_variant_inputs(self): """get_latest_version() should resolve latest variant and preserve Model input when already latest.""" test_model_infos = [ @@ -124,9 +135,12 @@ def test_should_resolve_latest_version_for_model_and_variant_inputs(self): ] class _MockCoreInterop: - def execute_command(self, command_name, command_input=None): + def _execute_command(self, command_name, command_input=None): if command_name == "get_catalog_name": return Response(data="TestCatalog", error=None) + return Response(data=None, error=f"Unexpected command: {command_name}") + + async def execute_command(self, command_name, command_input=None): if command_name == "get_model_list": return Response(data=json.dumps(test_model_infos), error=None) if command_name == "get_cached_models": @@ -134,12 +148,12 @@ def execute_command(self, command_name, command_input=None): return Response(data=None, error=f"Unexpected command: {command_name}") class _MockModelLoadManager: - def list_loaded(self): + async def list_loaded(self): return [] catalog = Catalog(_MockModelLoadManager(), _MockCoreInterop()) - model = catalog.get_model("test-alias") + model = await catalog.get_model("test-alias") assert model is not None variants = model.variants @@ -153,15 +167,15 @@ def list_loaded(self): assert middle_variant.id == "test-model:2" assert oldest_variant.id == "test-model:1" - result1 = catalog.get_latest_version(latest_variant) + result1 = await catalog.get_latest_version(latest_variant) assert result1.id == "test-model:3" - result2 = catalog.get_latest_version(middle_variant) + result2 = await catalog.get_latest_version(middle_variant) assert result2.id == "test-model:3" - result3 = catalog.get_latest_version(oldest_variant) + result3 = await catalog.get_latest_version(oldest_variant) assert result3.id == "test-model:3" model.select_variant(latest_variant) - result4 = catalog.get_latest_version(model) + result4 = await catalog.get_latest_version(model) assert result4 is model diff --git a/sdk/python/test/test_foundry_local_manager.py b/sdk/python/test/test_foundry_local_manager.py index 31528891..bf860931 100644 --- a/sdk/python/test/test_foundry_local_manager.py +++ b/sdk/python/test/test_foundry_local_manager.py @@ -6,6 +6,8 @@ from __future__ import annotations +import pytest + class _Response: def __init__(self, data=None, error=None): @@ -18,7 +20,7 @@ def __init__(self, responses): self._responses = responses self.calls = [] - def execute_command(self, command_name, command_input=None): + async def execute_command(self, command_name, command_input=None): self.calls.append((command_name, command_input)) return self._responses[command_name] @@ -26,18 +28,21 @@ def execute_command(self, command_name, command_input=None): class TestFoundryLocalManager: """Foundry Local Manager Tests.""" - def test_should_initialize_successfully(self, manager): + @pytest.mark.asyncio + async def test_should_initialize_successfully(self, manager): """Manager singleton should be non-None after initialize().""" assert manager is not None - def test_should_return_catalog(self, manager): + @pytest.mark.asyncio + async def test_should_return_catalog(self, manager): """Manager should expose a Catalog with a non-empty name.""" catalog = manager.catalog assert catalog is not None assert isinstance(catalog.name, str) assert len(catalog.name) > 0 - def test_discover_eps_returns_ep_info(self, manager): + @pytest.mark.asyncio + async def test_discover_eps_returns_ep_info(self, manager): original_core = manager._core_interop manager._core_interop = _FakeCoreInterop( { @@ -49,7 +54,7 @@ def test_discover_eps_returns_ep_info(self, manager): ) try: - eps = manager.discover_eps() + eps = await manager.discover_eps() finally: manager._core_interop = original_core @@ -58,7 +63,8 @@ def test_discover_eps_returns_ep_info(self, manager): assert eps[0].name == "CUDAExecutionProvider" assert eps[0].is_registered is True - def test_download_and_register_eps_returns_result(self, manager): + @pytest.mark.asyncio + async def test_download_and_register_eps_returns_result(self, manager): original_core = manager._core_interop manager._core_interop = _FakeCoreInterop( { @@ -73,7 +79,7 @@ def test_download_and_register_eps_returns_result(self, manager): ) try: - result = manager.download_and_register_eps(["CUDAExecutionProvider"]) + result = await manager.download_and_register_eps(["CUDAExecutionProvider"]) finally: manager._core_interop = original_core diff --git a/sdk/python/test/test_model.py b/sdk/python/test/test_model.py index e2ea1509..b4e47114 100644 --- a/sdk/python/test/test_model.py +++ b/sdk/python/test/test_model.py @@ -6,83 +6,91 @@ from __future__ import annotations +import pytest + from .conftest import TEST_MODEL_ALIAS, AUDIO_MODEL_ALIAS class TestModel: """Model Tests.""" - def test_should_verify_cached_models(self, catalog): + @pytest.mark.asyncio + async def test_should_verify_cached_models(self, catalog): """Cached models from test-data-shared should include qwen and whisper.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() assert isinstance(cached, list) assert len(cached) > 0 # Check qwen model is cached qwen = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) assert qwen is not None, f"{TEST_MODEL_ALIAS} should be cached" - assert qwen.is_cached is True + assert await qwen.is_cached() is True # Check whisper model is cached whisper = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None) assert whisper is not None, f"{AUDIO_MODEL_ALIAS} should be cached" - assert whisper.is_cached is True + assert await whisper.is_cached() is True - def test_should_load_and_unload_model(self, catalog): + @pytest.mark.asyncio + async def test_should_load_and_unload_model(self, catalog): """Load/unload cycle should toggle is_loaded on the selected variant.""" - cached = catalog.get_cached_models() + cached = await catalog.get_cached_models() assert len(cached) > 0 cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None) assert cached_variant is not None - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None model.select_variant(cached_variant) # Ensure it's not loaded initially (or unload if it is) - if model.is_loaded: - model.unload() - assert model.is_loaded is False + if await model.is_loaded(): + await model.unload() + assert await model.is_loaded() is False try: - model.load() - assert model.is_loaded is True + await model.load() + assert await model.is_loaded() is True - model.unload() - assert model.is_loaded is False + await model.unload() + assert await model.is_loaded() is False finally: # Safety cleanup - if model.is_loaded: - model.unload() + if await model.is_loaded(): + await model.unload() - def test_should_expose_context_length(self, catalog): + @pytest.mark.asyncio + async def test_should_expose_context_length(self, catalog): """Model should expose context_length from ModelInfo metadata.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None # context_length should be None or a positive integer ctx = model.context_length assert ctx is None or (isinstance(ctx, int) and ctx > 0) - def test_should_expose_modalities(self, catalog): + @pytest.mark.asyncio + async def test_should_expose_modalities(self, catalog): """Model should expose input_modalities and output_modalities.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None # Modalities should be None or non-empty strings for val in (model.input_modalities, model.output_modalities): assert val is None or (isinstance(val, str) and len(val) > 0) - def test_should_expose_capabilities(self, catalog): + @pytest.mark.asyncio + async def test_should_expose_capabilities(self, catalog): """Model should expose capabilities metadata.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None caps = model.capabilities assert caps is None or (isinstance(caps, str) and len(caps) > 0) - def test_should_expose_supports_tool_calling(self, catalog): + @pytest.mark.asyncio + async def test_should_expose_supports_tool_calling(self, catalog): """Model should expose supports_tool_calling metadata.""" - model = catalog.get_model(TEST_MODEL_ALIAS) + model = await catalog.get_model(TEST_MODEL_ALIAS) assert model is not None stc = model.supports_tool_calling assert stc is None or isinstance(stc, bool)