diff --git a/.pipelines/templates/test-python-steps.yml b/.pipelines/templates/test-python-steps.yml
index 37bfa5a8..7bde34b3 100644
--- a/.pipelines/templates/test-python-steps.yml
+++ b/.pipelines/templates/test-python-steps.yml
@@ -128,7 +128,7 @@ steps:
if ($LASTEXITCODE -ne 0) { throw "Windows App SDK Runtime install failed" }
errorActionPreference: 'stop'
-- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0
+- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 pytest-asyncio>=1.3.0
displayName: 'Install test dependencies'
- script: python -m pytest test/ -v
diff --git a/samples/python/audio-transcription/src/app.py b/samples/python/audio-transcription/src/app.py
index ca06fb28..d7e00c68 100644
--- a/samples/python/audio-transcription/src/app.py
+++ b/samples/python/audio-transcription/src/app.py
@@ -1,53 +1,59 @@
#
#
+import asyncio
import sys
from foundry_local_sdk import Configuration, FoundryLocalManager
#
-#
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
- global current_ep
- if ep_name != current_ep:
- if current_ep:
- print()
- current_ep = ep_name
- print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
- print()
+async def main():
+ #
+ # Initialize the Foundry Local SDK
+ config = Configuration(app_name="foundry_local_samples")
+ await FoundryLocalManager.initialize(config)
+ manager = FoundryLocalManager.instance
+
+ # Download and register all execution providers.
+ current_ep = ""
+ def _ep_progress(ep_name: str, percent: float):
+ nonlocal current_ep
+ if ep_name != current_ep:
+ if current_ep:
+ print()
+ current_ep = ep_name
+ print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
-# Load the whisper model for speech-to-text
-model = manager.catalog.get_model("whisper-tiny")
-model.download(
- lambda progress: print(
- f"\rDownloading model: {progress:.2f}%",
- end="",
- flush=True,
+ await manager.download_and_register_eps(progress_callback=_ep_progress)
+ if current_ep:
+ print()
+
+ # Load the whisper model for speech-to-text
+ model = await manager.catalog.get_model("whisper-tiny")
+ await model.download(
+ lambda progress: print(
+ f"\rDownloading model: {progress:.2f}%",
+ end="",
+ flush=True,
+ )
)
-)
-print()
-model.load()
-print("Model loaded.")
-#
-
-#
-# Get the audio client and transcribe
-audio_client = model.get_audio_client()
-audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
-result = audio_client.transcribe(audio_file)
-print("Transcription:")
-print(result.text)
-#
-
-# Clean up
-model.unload()
+ print()
+ await model.load()
+ print("Model loaded.")
+ #
+
+ #
+ # Get the audio client and transcribe
+ audio_client = model.get_audio_client()
+ audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
+ result = await audio_client.transcribe(audio_file)
+ print("Transcription:")
+ print(result.text)
+ #
+
+ # Clean up
+ await model.unload()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())
#
diff --git a/samples/python/langchain-integration/src/app.py b/samples/python/langchain-integration/src/app.py
index 4f8661cd..1cfc8790 100644
--- a/samples/python/langchain-integration/src/app.py
+++ b/samples/python/langchain-integration/src/app.py
@@ -1,73 +1,80 @@
#
#
+import asyncio
from foundry_local_sdk import Configuration, FoundryLocalManager
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
#
-#
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
- global current_ep
- if ep_name != current_ep:
- if current_ep:
- print()
- current_ep = ep_name
- print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
+async def main():
+ #
+ # Initialize the Foundry Local SDK
+ config = Configuration(app_name="foundry_local_samples")
+ await FoundryLocalManager.initialize(config)
+ manager = FoundryLocalManager.instance
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
+ # Download and register all execution providers.
+ current_ep = ""
+ def _ep_progress(ep_name: str, percent: float):
+ nonlocal current_ep
+ if ep_name != current_ep:
+ if current_ep:
+ print()
+ current_ep = ep_name
+ print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
+
+ await manager.download_and_register_eps(progress_callback=_ep_progress)
+ if current_ep:
+ print()
+
+ # Load a model
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(
+ lambda progress: print(
+ f"\rDownloading model: {progress:.2f}%",
+ end="",
+ flush=True,
+ )
+ )
print()
+ await model.load()
+ print("Model loaded.")
+
+ # Start the web service to expose an OpenAI-compatible endpoint
+ await manager.start_web_service()
+ base_url = f"{manager.urls[0]}/v1"
+ #
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
- lambda progress: print(
- f"\rDownloading model: {progress:.2f}%",
- end="",
- flush=True,
+ #
+ # Create a LangChain ChatOpenAI instance pointing to the local endpoint
+ llm = ChatOpenAI(
+ base_url=base_url,
+ api_key="none",
+ model=model.id,
)
-)
-print()
-model.load()
-print("Model loaded.")
+ #
-# Start the web service to expose an OpenAI-compatible endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-#
+ #
+ # Create a translation chain
+ prompt = ChatPromptTemplate.from_messages([
+ ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
+ ("user", "{text}")
+ ])
-#
-# Create a LangChain ChatOpenAI instance pointing to the local endpoint
-llm = ChatOpenAI(
- base_url=base_url,
- api_key="none",
- model=model.id,
-)
-#
+ chain = prompt | llm | StrOutputParser()
-#
-# Create a translation chain
-prompt = ChatPromptTemplate.from_messages([
- ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
- ("user", "{text}")
-])
+ # Run the chain
+ result = await chain.ainvoke({"language": "Spanish", "text": "Hello, how are you today?"})
+ print(f"Translation: {result}")
+ #
-chain = prompt | llm | StrOutputParser()
+ # Clean up
+ await model.unload()
+ await manager.stop_web_service()
-# Run the chain
-result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"})
-print(f"Translation: {result}")
-#
-# Clean up
-model.unload()
-manager.stop_web_service()
+if __name__ == "__main__":
+ asyncio.run(main())
#
diff --git a/samples/python/native-chat-completions/src/app.py b/samples/python/native-chat-completions/src/app.py
index eba9df41..8fdb01f5 100644
--- a/samples/python/native-chat-completions/src/app.py
+++ b/samples/python/native-chat-completions/src/app.py
@@ -1,14 +1,15 @@
#
#
+import asyncio
from foundry_local_sdk import Configuration, FoundryLocalManager
#
-def main():
+async def main():
#
# Initialize the Foundry Local SDK
config = Configuration(app_name="foundry_local_samples")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
# Download and register all execution providers.
@@ -21,13 +22,13 @@ def ep_progress(ep_name: str, percent: float):
current_ep = ep_name
print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
- manager.download_and_register_eps(progress_callback=ep_progress)
+ await manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
# Select and load a model from the catalog
- model = manager.catalog.get_model("qwen2.5-0.5b")
- model.download(
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
end="",
@@ -35,7 +36,7 @@ def ep_progress(ep_name: str, percent: float):
)
)
print()
- model.load()
+ await model.load()
print("Model loaded and ready.")
# Get a chat client
@@ -50,7 +51,7 @@ def ep_progress(ep_name: str, percent: float):
# Stream the response token by token
print("Assistant: ", end="", flush=True)
- for chunk in client.complete_streaming_chat(messages):
+ async for chunk in client.complete_streaming_chat(messages):
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
@@ -58,10 +59,10 @@ def ep_progress(ep_name: str, percent: float):
#
# Clean up
- model.unload()
+ await model.unload()
print("Model unloaded.")
if __name__ == "__main__":
- main()
+ asyncio.run(main())
#
diff --git a/samples/python/tool-calling/src/app.py b/samples/python/tool-calling/src/app.py
index db619550..22e89518 100644
--- a/samples/python/tool-calling/src/app.py
+++ b/samples/python/tool-calling/src/app.py
@@ -1,5 +1,6 @@
#
#
+import asyncio
import json
from foundry_local_sdk import Configuration, FoundryLocalManager
#
@@ -83,7 +84,7 @@ def calculate(expression):
#
-def process_tool_calls(messages, response, client):
+async def process_tool_calls(messages, response, client):
"""Handle tool calls in a loop until the model produces a final answer."""
choice = response.choices[0].message
@@ -121,7 +122,7 @@ def process_tool_calls(messages, response, client):
})
# Send the updated conversation back
- response = client.complete_chat(messages, tools=tools)
+ response = await client.complete_chat(messages, tools=tools)
choice = response.choices[0].message
return choice.content
@@ -129,10 +130,10 @@ def process_tool_calls(messages, response, client):
#
-def main():
+async def main():
# Initialize the Foundry Local SDK
config = Configuration(app_name="foundry_local_samples")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
# Download and register all execution providers.
@@ -145,13 +146,13 @@ def ep_progress(ep_name: str, percent: float):
current_ep = ep_name
print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
- manager.download_and_register_eps(progress_callback=ep_progress)
+ await manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
# Select and load a model
- model = manager.catalog.get_model("qwen2.5-0.5b")
- model.download(
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
end="",
@@ -159,7 +160,7 @@ def ep_progress(ep_name: str, percent: float):
)
)
print()
- model.load()
+ await model.load()
print("Model loaded and ready.")
# Get a chat client
@@ -179,17 +180,17 @@ def ep_progress(ep_name: str, percent: float):
]
print("Sending request with tools...")
- response = client.complete_chat(messages, tools=tools)
- answer = process_tool_calls(messages, response, client)
+ response = await client.complete_chat(messages, tools=tools)
+ answer = await process_tool_calls(messages, response, client)
print(f"\nAssistant: {answer}")
# Clean up
- model.unload()
+ await model.unload()
print("Model unloaded.")
#
if __name__ == "__main__":
- main()
+ asyncio.run(main())
#
diff --git a/samples/python/tutorial-chat-assistant/src/app.py b/samples/python/tutorial-chat-assistant/src/app.py
index 13f1c500..2650c0a2 100644
--- a/samples/python/tutorial-chat-assistant/src/app.py
+++ b/samples/python/tutorial-chat-assistant/src/app.py
@@ -1,14 +1,15 @@
#
#
+import asyncio
from foundry_local_sdk import Configuration, FoundryLocalManager
#
-def main():
+async def main():
#
# Initialize the Foundry Local SDK
config = Configuration(app_name="foundry_local_samples")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
# Download and register all execution providers.
@@ -21,15 +22,15 @@ def ep_progress(ep_name: str, percent: float):
current_ep = ep_name
print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
- manager.download_and_register_eps(progress_callback=ep_progress)
+ await manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
# Select and load a model from the catalog
- model = manager.catalog.get_model("qwen2.5-0.5b")
- model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True))
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True))
print()
- model.load()
+ await model.load()
print("Model loaded and ready.")
# Get a chat client
@@ -62,7 +63,7 @@ def ep_progress(ep_name: str, percent: float):
# Stream the response token by token
print("Assistant: ", end="", flush=True)
full_response = ""
- for chunk in client.complete_streaming_chat(messages):
+ async for chunk in client.complete_streaming_chat(messages):
content = chunk.choices[0].delta.content
if content:
print(content, end="", flush=True)
@@ -75,10 +76,10 @@ def ep_progress(ep_name: str, percent: float):
#
# Clean up - unload the model
- model.unload()
+ await model.unload()
print("Model unloaded. Goodbye!")
if __name__ == "__main__":
- main()
+ asyncio.run(main())
#
diff --git a/samples/python/tutorial-document-summarizer/src/app.py b/samples/python/tutorial-document-summarizer/src/app.py
index 055bb992..fb50f6ad 100644
--- a/samples/python/tutorial-document-summarizer/src/app.py
+++ b/samples/python/tutorial-document-summarizer/src/app.py
@@ -1,23 +1,24 @@
#
#
+import asyncio
import sys
from pathlib import Path
from foundry_local_sdk import Configuration, FoundryLocalManager
#
-def summarize_file(client, file_path, system_prompt):
+async def summarize_file(client, file_path, system_prompt):
"""Summarize a single file and print the result."""
content = Path(file_path).read_text(encoding="utf-8")
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": content}
]
- response = client.complete_chat(messages)
+ response = await client.complete_chat(messages)
print(response.choices[0].message.content)
-def summarize_directory(client, directory, system_prompt):
+async def summarize_directory(client, directory, system_prompt):
"""Summarize all .txt files in a directory."""
txt_files = sorted(Path(directory).glob("*.txt"))
@@ -27,15 +28,15 @@ def summarize_directory(client, directory, system_prompt):
for txt_file in txt_files:
print(f"--- {txt_file.name} ---")
- summarize_file(client, txt_file, system_prompt)
+ await summarize_file(client, txt_file, system_prompt)
print()
-def main():
+async def main():
#
# Initialize the Foundry Local SDK
config = Configuration(app_name="foundry_local_samples")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
# Download and register all execution providers.
@@ -48,15 +49,15 @@ def ep_progress(ep_name: str, percent: float):
current_ep = ep_name
print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
- manager.download_and_register_eps(progress_callback=ep_progress)
+ await manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
# Select and load a model from the catalog
- model = manager.catalog.get_model("qwen2.5-0.5b")
- model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True))
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True))
print()
- model.load()
+ await model.load()
print("Model loaded and ready.\n")
# Get a chat client
@@ -75,17 +76,17 @@ def ep_progress(ep_name: str, percent: float):
#
if target_path.is_dir():
- summarize_directory(client, target_path, system_prompt)
+ await summarize_directory(client, target_path, system_prompt)
else:
print(f"--- {target_path.name} ---")
- summarize_file(client, target_path, system_prompt)
+ await summarize_file(client, target_path, system_prompt)
#
# Clean up
- model.unload()
+ await model.unload()
print("\nModel unloaded. Done!")
if __name__ == "__main__":
- main()
+ asyncio.run(main())
#
diff --git a/samples/python/tutorial-tool-calling/src/app.py b/samples/python/tutorial-tool-calling/src/app.py
index bb22bfe0..097432c2 100644
--- a/samples/python/tutorial-tool-calling/src/app.py
+++ b/samples/python/tutorial-tool-calling/src/app.py
@@ -1,5 +1,6 @@
#
#
+import asyncio
import json
from foundry_local_sdk import Configuration, FoundryLocalManager
#
@@ -83,7 +84,7 @@ def calculate(expression):
#
-def process_tool_calls(messages, response, client):
+async def process_tool_calls(messages, response, client):
"""Handle tool calls in a loop until the model produces a final answer."""
choice = response.choices[0].message
@@ -121,7 +122,7 @@ def process_tool_calls(messages, response, client):
})
# Send the updated conversation back
- response = client.complete_chat(messages, tools=tools)
+ response = await client.complete_chat(messages, tools=tools)
choice = response.choices[0].message
return choice.content
@@ -129,10 +130,10 @@ def process_tool_calls(messages, response, client):
#
-def main():
+async def main():
# Initialize the Foundry Local SDK
config = Configuration(app_name="foundry_local_samples")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
# Download and register all execution providers.
@@ -145,13 +146,13 @@ def ep_progress(ep_name: str, percent: float):
current_ep = ep_name
print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
- manager.download_and_register_eps(progress_callback=ep_progress)
+ await manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
# Select and load a model
- model = manager.catalog.get_model("qwen2.5-0.5b")
- model.download(
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(
lambda progress: print(
f"\rDownloading model: {progress:.2f}%",
end="",
@@ -159,7 +160,7 @@ def ep_progress(ep_name: str, percent: float):
)
)
print()
- model.load()
+ await model.load()
print("Model loaded and ready.")
# Get a chat client
@@ -183,18 +184,18 @@ def ep_progress(ep_name: str, percent: float):
messages.append({"role": "user", "content": user_input})
- response = client.complete_chat(messages, tools=tools)
- answer = process_tool_calls(messages, response, client)
+ response = await client.complete_chat(messages, tools=tools)
+ answer = await process_tool_calls(messages, response, client)
messages.append({"role": "assistant", "content": answer})
print(f"Assistant: {answer}\n")
# Clean up
- model.unload()
+ await model.unload()
print("Model unloaded. Goodbye!")
#
if __name__ == "__main__":
- main()
+ asyncio.run(main())
#
diff --git a/samples/python/tutorial-voice-to-text/src/app.py b/samples/python/tutorial-voice-to-text/src/app.py
index 8ebbba1b..9585f6f8 100644
--- a/samples/python/tutorial-voice-to-text/src/app.py
+++ b/samples/python/tutorial-voice-to-text/src/app.py
@@ -1,14 +1,15 @@
#
#
+import asyncio
from foundry_local_sdk import Configuration, FoundryLocalManager
#
-def main():
+async def main():
#
# Initialize the Foundry Local SDK
config = Configuration(app_name="foundry_local_samples")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
#
@@ -22,14 +23,14 @@ def ep_progress(ep_name: str, percent: float):
current_ep = ep_name
print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
- manager.download_and_register_eps(progress_callback=ep_progress)
+ await manager.download_and_register_eps(progress_callback=ep_progress)
if current_ep:
print()
#
# Load the speech-to-text model
- speech_model = manager.catalog.get_model("whisper-tiny")
- speech_model.download(
+ speech_model = await manager.catalog.get_model("whisper-tiny")
+ await speech_model.download(
lambda progress: print(
f"\rDownloading speech model: {progress:.2f}%",
end="",
@@ -37,22 +38,22 @@ def ep_progress(ep_name: str, percent: float):
)
)
print()
- speech_model.load()
+ await speech_model.load()
print("Speech model loaded.")
# Transcribe the audio file
audio_client = speech_model.get_audio_client()
- transcription = audio_client.transcribe("meeting-notes.wav")
+ transcription = await audio_client.transcribe("meeting-notes.wav")
print(f"\nTranscription:\n{transcription.text}")
# Unload the speech model to free memory
- speech_model.unload()
+ await speech_model.unload()
#
#
# Load the chat model for summarization
- chat_model = manager.catalog.get_model("qwen2.5-0.5b")
- chat_model.download(
+ chat_model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await chat_model.download(
lambda progress: print(
f"\rDownloading chat model: {progress:.2f}%",
end="",
@@ -60,7 +61,7 @@ def ep_progress(ep_name: str, percent: float):
)
)
print()
- chat_model.load()
+ await chat_model.load()
print("Chat model loaded.")
# Summarize the transcription into organized notes
@@ -76,16 +77,16 @@ def ep_progress(ep_name: str, percent: float):
{"role": "user", "content": transcription.text},
]
- response = client.complete_chat(messages)
+ response = await client.complete_chat(messages)
summary = response.choices[0].message.content
print(f"\nSummary:\n{summary}")
# Clean up
- chat_model.unload()
+ await chat_model.unload()
print("\nDone. Models unloaded.")
#
if __name__ == "__main__":
- main()
+ asyncio.run(main())
#
diff --git a/samples/python/web-server/src/app.py b/samples/python/web-server/src/app.py
index 67117029..c4fe9c1e 100644
--- a/samples/python/web-server/src/app.py
+++ b/samples/python/web-server/src/app.py
@@ -1,73 +1,80 @@
#
#
+import asyncio
import openai
from foundry_local_sdk import Configuration, FoundryLocalManager
#
-#
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
- global current_ep
- if ep_name != current_ep:
- if current_ep:
- print()
- current_ep = ep_name
- print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
+async def main():
+ #
+ # Initialize the Foundry Local SDK
+ config = Configuration(app_name="foundry_local_samples")
+ await FoundryLocalManager.initialize(config)
+ manager = FoundryLocalManager.instance
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
+ # Download and register all execution providers.
+ current_ep = ""
+ def _ep_progress(ep_name: str, percent: float):
+ nonlocal current_ep
+ if ep_name != current_ep:
+ if current_ep:
+ print()
+ current_ep = ep_name
+ print(f"\r {ep_name:<30} {percent:5.1f}%", end="", flush=True)
+
+ await manager.download_and_register_eps(progress_callback=_ep_progress)
+ if current_ep:
+ print()
+
+ # Load a model
+ model = await manager.catalog.get_model("qwen2.5-0.5b")
+ await model.download(
+ lambda progress: print(
+ f"\rDownloading model: {progress:.2f}%",
+ end="",
+ flush=True,
+ )
+ )
print()
+ await model.load()
+ print("Model loaded.")
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
- lambda progress: print(
- f"\rDownloading model: {progress:.2f}%",
- end="",
- flush=True,
+ # Start the web service to expose an OpenAI-compatible REST endpoint
+ await manager.start_web_service()
+ base_url = f"{manager.urls[0]}/v1"
+ #
+
+ #
+ # Use the OpenAI SDK to connect to the local REST endpoint
+ client = openai.OpenAI(
+ base_url=base_url,
+ api_key="none",
)
-)
-print()
-model.load()
-print("Model loaded.")
+ #
-# Start the web service to expose an OpenAI-compatible REST endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-#
+ #
+ # Make a chat completion request via the REST API
+ response = client.chat.completions.create(
+ model=model.id,
+ messages=[
+ {"role": "system", "content": "You are a helpful assistant."},
+ {"role": "user", "content": "What is the golden ratio?"}
+ ],
+ stream=True,
+ )
-#
-# Use the OpenAI SDK to connect to the local REST endpoint
-client = openai.OpenAI(
- base_url=base_url,
- api_key="none",
-)
-#
+ for chunk in response:
+ if chunk.choices[0].delta.content is not None:
+ print(chunk.choices[0].delta.content, end="", flush=True)
+ print()
+ #
-#
-# Make a chat completion request via the REST API
-response = client.chat.completions.create(
- model=model.id,
- messages=[
- {"role": "system", "content": "You are a helpful assistant."},
- {"role": "user", "content": "What is the golden ratio?"}
- ],
- stream=True,
-)
+ # Clean up
+ await model.unload()
+ await manager.stop_web_service()
-for chunk in response:
- if chunk.choices[0].delta.content is not None:
- print(chunk.choices[0].delta.content, end="", flush=True)
-print()
-#
-# Clean up
-model.unload()
-manager.stop_web_service()
+if __name__ == "__main__":
+ asyncio.run(main())
#
diff --git a/sdk/python/examples/chat_completion.py b/sdk/python/examples/chat_completion.py
index c0c58048..fabd89d0 100644
--- a/sdk/python/examples/chat_completion.py
+++ b/sdk/python/examples/chat_completion.py
@@ -10,31 +10,32 @@
including model discovery, loading, and inference.
"""
+import asyncio
from foundry_local_sdk import Configuration, FoundryLocalManager
-def main():
+async def main():
# 1. Initialize the SDK
config = Configuration(app_name="ChatCompletionExample")
print("Initializing Foundry Local Manager")
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
manager = FoundryLocalManager.instance
# Discover available EPs and register them explicitly when needed.
- eps = manager.discover_eps()
+ eps = await manager.discover_eps()
print("Available execution providers:")
for ep in eps:
print(f" - {ep.name} (registered: {ep.is_registered})")
- ep_result = manager.download_and_register_eps()
+ ep_result = await manager.download_and_register_eps()
print(f"EP registration success: {ep_result.success} ({ep_result.status})")
# 2. Print available models in the catalog and cache
- models = manager.catalog.list_models()
+ models = await manager.catalog.list_models()
print("Available models in catalog:")
for m in models:
print(f" - {m.alias} ({m.id})")
- cached_models = manager.catalog.get_cached_models()
+ cached_models = await manager.catalog.get_cached_models()
print("\nCached models:")
for m in cached_models:
print(f" - {m.alias} ({m.id})")
@@ -42,22 +43,22 @@ def main():
CACHED_MODEL_ALIAS = "qwen2.5-0.5b"
# 3. Find a model from the cache (+ download if not cached)
- model = manager.catalog.get_model(CACHED_MODEL_ALIAS)
+ model = await manager.catalog.get_model(CACHED_MODEL_ALIAS)
if model is None:
print(f"Model '{CACHED_MODEL_ALIAS}' not found in catalog.")
print("Available models:")
- for m in manager.catalog.list_models():
+ for m in await manager.catalog.list_models():
print(f" - {m.alias} ({m.id})")
return
- if not model.is_cached:
+ if not await model.is_cached():
print(f"Downloading {model.alias}...")
- model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r"))
+ await model.download(progress_callback=lambda pct: print(f" {pct:.1f}%", end="\r"))
print()
# 4. Load the model
print(f"Loading {model.alias}...", end="")
- model.load()
+ await model.load()
print("loaded!")
try:
@@ -65,14 +66,14 @@ def main():
client = model.get_chat_client()
print("\n--- Non-streaming ---")
- response = client.complete_chat(
+ response = await client.complete_chat(
messages=[{"role": "user", "content": "What is the capital of France? Reply briefly."}]
)
print(f"Response: {response.choices[0].message.content}")
# 6. Streaming
print("\n--- Streaming ---")
- for chunk in client.complete_streaming_chat(
+ async for chunk in client.complete_streaming_chat(
[{"role": "user", "content": "Tell me a short joke."}]
):
if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
@@ -84,9 +85,9 @@ def main():
finally:
# 7. Cleanup
- model.unload()
+ await model.unload()
print("\nModel unloaded.")
if __name__ == "__main__":
- main()
+ asyncio.run(main())
diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml
index ef93b6f7..851ea0a6 100644
--- a/sdk/python/pyproject.toml
+++ b/sdk/python/pyproject.toml
@@ -53,3 +53,5 @@ python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
timeout = 60
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
diff --git a/sdk/python/requirements-dev.txt b/sdk/python/requirements-dev.txt
index aea40875..c581a02b 100644
--- a/sdk/python/requirements-dev.txt
+++ b/sdk/python/requirements-dev.txt
@@ -2,4 +2,5 @@
build
coverage
pytest
+pytest-asyncio
pytest-timeout
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
index 666a3721..28daade2 100644
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -1,8 +1,7 @@
pydantic>=2.0.0
requests>=2.32.4
openai>=2.24.0
-# Standard native binary packages from the ORT-Nightly PyPI feed.
-foundry-local-core==1.0.0rc1
+foundry-local-core==1.0.0
onnxruntime-core==1.24.4; sys_platform != "linux"
onnxruntime-gpu==1.24.4; sys_platform == "linux"
onnxruntime-genai-core==0.13.1; sys_platform != "linux"
diff --git a/sdk/python/src/catalog.py b/sdk/python/src/catalog.py
index 51f5bd8f..b79e2473 100644
--- a/sdk/python/src/catalog.py
+++ b/sdk/python/src/catalog.py
@@ -5,9 +5,9 @@
from __future__ import annotations
+import asyncio
import datetime
import logging
-import threading
from typing import List, Optional
from pydantic import TypeAdapter
@@ -38,26 +38,26 @@ def __init__(self, model_load_manager: ModelLoadManager, core_interop: CoreInter
"""
self._core_interop = core_interop
self._model_load_manager = model_load_manager
- self._lock = threading.Lock()
+ self._lock = asyncio.Lock()
self._models: List[ModelInfo] = []
self._model_alias_to_model = {}
self._model_id_to_model_variant = {}
self._last_fetch = datetime.datetime.min
- response = core_interop.execute_command("get_catalog_name")
+ response = core_interop._execute_command("get_catalog_name")
if response.error is not None:
raise FoundryLocalException(f"Failed to get catalog name: {response.error}")
self.name = response.data
- def _update_models(self):
- with self._lock:
+ async def _update_models(self):
+ async with self._lock:
# refresh every 6 hours
if (datetime.datetime.now() - self._last_fetch) < datetime.timedelta(hours=6):
return
- response = self._core_interop.execute_command("get_model_list")
+ response = await self._core_interop.execute_command("get_model_list")
if response.error is not None:
raise FoundryLocalException(f"Failed to get model list: {response.error}")
@@ -84,28 +84,28 @@ def _update_models(self):
self._models = models
self._last_fetch = datetime.datetime.now()
- def _invalidate_cache(self):
- with self._lock:
+ async def _invalidate_cache(self):
+ async with self._lock:
self._last_fetch = datetime.datetime.min
- def list_models(self) -> List[IModel]:
+ async def list_models(self) -> List[IModel]:
"""
List the available models in the catalog.
:return: List of IModel instances.
"""
- self._update_models()
+ await self._update_models()
return list(self._model_alias_to_model.values())
- def get_model(self, model_alias: str) -> Optional[IModel]:
+ async def get_model(self, model_alias: str) -> Optional[IModel]:
"""
Lookup a model by its alias.
:param model_alias: Model alias.
:return: IModel if found.
"""
- self._update_models()
+ await self._update_models()
return self._model_alias_to_model.get(model_alias)
- def get_model_variant(self, model_id: str) -> Optional[IModel]:
+ async def get_model_variant(self, model_id: str) -> Optional[IModel]:
"""
Lookup a model variant by its unique model id.
NOTE: This will return an IModel with a single variant. Use get_model to get an IModel with all available
@@ -113,10 +113,10 @@ def get_model_variant(self, model_id: str) -> Optional[IModel]:
:param model_id: Model id.
:return: IModel if found.
"""
- self._update_models()
+ await self._update_models()
return self._model_id_to_model_variant.get(model_id)
- def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
+ async def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
"""
Resolve the latest catalog version for the provided model or variant.
@@ -124,7 +124,7 @@ def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
:return: Latest catalog version for the same model name.
:raises FoundryLocalException: If the alias or name cannot be resolved.
"""
- self._update_models()
+ await self._update_models()
model = self._model_alias_to_model.get(model_or_model_variant.alias)
if model is None:
@@ -144,14 +144,14 @@ def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
return model_or_model_variant if latest.id == model_or_model_variant.id else latest
- def get_cached_models(self) -> List[IModel]:
+ async def get_cached_models(self) -> List[IModel]:
"""
Get a list of currently downloaded models from the model cache.
:return: List of IModel instances.
"""
- self._update_models()
+ await self._update_models()
- cached_model_ids = get_cached_model_ids(self._core_interop)
+ cached_model_ids = await get_cached_model_ids(self._core_interop)
cached_models: List[IModel] = []
for model_id in cached_model_ids:
@@ -161,14 +161,14 @@ def get_cached_models(self) -> List[IModel]:
return cached_models
- def get_loaded_models(self) -> List[IModel]:
+ async def get_loaded_models(self) -> List[IModel]:
"""
Get a list of the currently loaded models.
:return: List of IModel instances.
"""
- self._update_models()
+ await self._update_models()
- loaded_model_ids = self._model_load_manager.list_loaded()
+ loaded_model_ids = await self._model_load_manager.list_loaded()
loaded_models: List[IModel] = []
for model_id in loaded_model_ids:
diff --git a/sdk/python/src/detail/core_interop.py b/sdk/python/src/detail/core_interop.py
index 1cd53e33..aa520f03 100644
--- a/sdk/python/src/detail/core_interop.py
+++ b/sdk/python/src/detail/core_interop.py
@@ -5,11 +5,13 @@
from __future__ import annotations
+import asyncio
import ctypes
import json
import logging
import os
import sys
+import threading
from dataclasses import dataclass
from pathlib import Path
@@ -104,6 +106,9 @@ class CoreInterop:
instance = None
+ # Serialize native calls — the underlying C library may not be thread-safe.
+ _native_lock = threading.Lock()
+
# Callback function for native interop.
# Returns c_int: 0 = continue, 1 = cancel.
CALLBACK_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)
@@ -218,7 +223,7 @@ def __init__(self, config: Configuration):
config.additional_settings["Bootstrap"] = "true"
request = InteropRequest(params=config.as_dictionary())
- response = self.execute_command("initialize", request)
+ response = self._execute_command("initialize", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to initialize Foundry.Local.Core: {response.error}")
@@ -226,41 +231,42 @@ def __init__(self, config: Configuration):
def _execute_command(self, command: str, interop_request: InteropRequest = None,
callback: CoreInterop.CALLBACK_TYPE = None):
- cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command)
- data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None)
+ with CoreInterop._native_lock:
+ cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command)
+ data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None)
- req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len)
- resp = ResponseBuffer()
- lib = CoreInterop._flcore_library
+ req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len)
+ resp = ResponseBuffer()
+ lib = CoreInterop._flcore_library
- if (callback is not None):
- # If a callback is provided, use the execute_command_with_callback method
- # We need a helper to do the initial conversion from ctypes to Python and pass it through to the
- # provided callback function
- callback_helper = CallbackHelper(callback)
- callback_py_obj = ctypes.py_object(callback_helper)
- callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p)
- callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback)
+ if (callback is not None):
+ # If a callback is provided, use the execute_command_with_callback method
+ # We need a helper to do the initial conversion from ctypes to Python and pass it through to the
+ # provided callback function
+ callback_helper = CallbackHelper(callback)
+ callback_py_obj = ctypes.py_object(callback_helper)
+ callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p)
+ callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback)
- lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr)
+ lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr)
- if callback_helper.exception is not None:
- raise callback_helper.exception
- else:
- lib.execute_command(ctypes.byref(req), ctypes.byref(resp))
+ if callback_helper.exception is not None:
+ raise callback_helper.exception
+ else:
+ lib.execute_command(ctypes.byref(req), ctypes.byref(resp))
- req = None # Free Python reference to request
+ req = None # Free Python reference to request
- response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None
- error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None
+ response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None
+ error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None
- # C# owns the memory in the response so we need to free it explicitly
- lib.free_response(resp)
-
- return Response(data=response_str, error=error_str)
+ # C# owns the memory in the response so we need to free it explicitly
+ lib.free_response(resp)
+
+ return Response(data=response_str, error=error_str)
- def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response:
- """Execute a command synchronously.
+ async def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response:
+ """Execute a command asynchronously.
Args:
command_name: The native command name (e.g. ``"get_model_list"``).
@@ -272,10 +278,9 @@ def execute_command(self, command_name: str, command_input: Optional[InteropRequ
logger.debug("Executing command: %s Input: %s", command_name,
command_input.params if command_input else None)
- response = self._execute_command(command_name, command_input)
- return response
+ return await asyncio.to_thread(self._execute_command, command_name, command_input)
- def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest],
+ async def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest],
callback: Callable[[str], None]) -> Response:
"""Execute a command with a streaming callback.
@@ -292,14 +297,13 @@ def execute_command_with_callback(self, command_name: str, command_input: Option
"""
logger.debug("Executing command with callback: %s Input: %s", command_name,
command_input.params if command_input else None)
- response = self._execute_command(command_name, command_input, callback)
- return response
+ return await asyncio.to_thread(self._execute_command, command_name, command_input, callback)
-def get_cached_model_ids(core_interop: CoreInterop) -> list[str]:
+async def get_cached_model_ids(core_interop: CoreInterop) -> list[str]:
"""Get the list of models that have been downloaded and are cached."""
- response = core_interop.execute_command("get_cached_models")
+ response = await core_interop.execute_command("get_cached_models")
if response.error is not None:
raise FoundryLocalException(f"Failed to get cached models: {response.error}")
diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py
index 189920b1..9f13d23e 100644
--- a/sdk/python/src/detail/model.py
+++ b/sdk/python/src/detail/model.py
@@ -104,35 +104,33 @@ def supports_tool_calling(self) -> Optional[bool]:
"""Whether the currently selected variant supports tool/function calling."""
return self._selected_variant.supports_tool_calling
- @property
- def is_cached(self) -> bool:
+ async def is_cached(self) -> bool:
"""Is the currently selected variant cached locally?"""
- return self._selected_variant.is_cached
+ return await self._selected_variant.is_cached()
- @property
- def is_loaded(self) -> bool:
+ async def is_loaded(self) -> bool:
"""Is the currently selected variant loaded in memory?"""
- return self._selected_variant.is_loaded
+ return await self._selected_variant.is_loaded()
- def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None:
+ async def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None:
"""Download the currently selected variant."""
- self._selected_variant.download(progress_callback)
+ await self._selected_variant.download(progress_callback)
- def get_path(self) -> str:
+ async def get_path(self) -> str:
"""Get the path to the currently selected variant."""
- return self._selected_variant.get_path()
+ return await self._selected_variant.get_path()
- def load(self) -> None:
+ async def load(self) -> None:
"""Load the currently selected variant into memory."""
- self._selected_variant.load()
+ await self._selected_variant.load()
- def unload(self) -> None:
+ async def unload(self) -> None:
"""Unload the currently selected variant from memory."""
- self._selected_variant.unload()
+ await self._selected_variant.unload()
- def remove_from_cache(self) -> None:
+ async def remove_from_cache(self) -> None:
"""Remove the currently selected variant from the local cache."""
- self._selected_variant.remove_from_cache()
+ await self._selected_variant.remove_from_cache()
def get_chat_client(self) -> ChatClient:
"""Get a chat client for the currently selected variant."""
diff --git a/sdk/python/src/detail/model_load_manager.py b/sdk/python/src/detail/model_load_manager.py
index 8ffd087a..2633d18c 100644
--- a/sdk/python/src/detail/model_load_manager.py
+++ b/sdk/python/src/detail/model_load_manager.py
@@ -4,6 +4,7 @@
# --------------------------------------------------------------------------
from __future__ import annotations
+import asyncio
import json
import logging
import requests
@@ -32,7 +33,7 @@ def __init__(self, core_interop: CoreInterop, external_service_url: str = None):
self._core_interop = core_interop
self._external_service_url = external_service_url
- def load(self, model_id: str) -> None:
+ async def load(self, model_id: str) -> None:
"""
Load a model by its ID.
@@ -47,37 +48,37 @@ def load(self, model_id: str) -> None:
communicating with the external service.
"""
if self._external_service_url:
- self._web_load_model(model_id)
+ await self._web_load_model(model_id)
return
request = InteropRequest({"Model": model_id})
- response = self._core_interop.execute_command("load_model", request)
+ response = await self._core_interop.execute_command("load_model", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to load model {model_id}: {response.error}")
- def unload(self, model_id: str) -> None:
+ async def unload(self, model_id: str) -> None:
"""
Unload a model by its ID.
:param model_id: The ID of the model to unload.
"""
if self._external_service_url:
- self._web_unload_model(model_id)
+ await self._web_unload_model(model_id)
return
request = InteropRequest({"Model": model_id})
- response = self._core_interop.execute_command("unload_model", request)
+ response = await self._core_interop.execute_command("unload_model", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to unload model {model_id}: {response.error}")
- def list_loaded(self) -> list[str]:
+ async def list_loaded(self) -> list[str]:
"""
List loaded models.
:return: List of loaded model IDs
"""
if self._external_service_url:
- return self._web_list_loaded_models()
+ return await self._web_list_loaded_models()
- response = self._core_interop.execute_command("list_loaded_models")
+ response = await self._core_interop.execute_command("list_loaded_models")
if response.error is not None:
raise FoundryLocalException(f"Failed to list loaded models: {response.error}")
@@ -88,9 +89,12 @@ def list_loaded(self) -> list[str]:
return model_ids
- def _web_list_loaded_models(self) -> List[str]:
+ async def _web_list_loaded_models(self) -> List[str]:
try:
- response = requests.get(f"{self._external_service_url}/models/loaded", headers=self._headers, timeout=10)
+ response = await asyncio.to_thread(
+ requests.get, f"{self._external_service_url}/models/loaded",
+ headers=self._headers, timeout=10
+ )
if not response.ok:
raise FoundryLocalException(
@@ -109,7 +113,7 @@ def _web_list_loaded_models(self) -> List[str]:
except json.JSONDecodeError as e:
raise FoundryLocalException(f"Failed to decode JSON response: Response was: {content}") from e
- def _web_load_model(self, model_id: str) -> None:
+ async def _web_load_model(self, model_id: str) -> None:
"""
Load a model via the external web service.
@@ -126,7 +130,7 @@ def _web_load_model(self, model_id: str) -> None:
# }
# response = requests.get(url, params=query_params)
- response = requests.get(url, headers=self._headers, timeout=10)
+ response = await asyncio.to_thread(requests.get, url, headers=self._headers, timeout=10)
if not response.ok:
raise FoundryLocalException(
@@ -143,12 +147,12 @@ def _web_load_model(self, model_id: str) -> None:
f"HTTP request failed when loading model {model_id} from {self._external_service_url}: {e}"
) from e
- def _web_unload_model(self, model_id: str) -> None:
+ async def _web_unload_model(self, model_id: str) -> None:
try:
encoded_model_id = quote(model_id)
url = f"{self._external_service_url}/models/unload/{encoded_model_id}"
- response = requests.get(url, headers=self._headers, timeout=10)
+ response = await asyncio.to_thread(requests.get, url, headers=self._headers, timeout=10)
if not response.ok:
raise FoundryLocalException(
diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py
index a5ac02d4..e6fab104 100644
--- a/sdk/python/src/detail/model_variant.py
+++ b/sdk/python/src/detail/model_variant.py
@@ -4,6 +4,7 @@
# --------------------------------------------------------------------------
from __future__ import annotations
+import asyncio
import logging
from typing import Callable, List, Optional
@@ -99,19 +100,17 @@ def supports_tool_calling(self) -> Optional[bool]:
"""Whether this variant supports tool/function calling, or ``None`` if unknown."""
return self._model_info.supports_tool_calling
- @property
- def is_cached(self) -> bool:
+ async def is_cached(self) -> bool:
"""``True`` if this variant is present in the local model cache."""
- cached_model_ids = get_cached_model_ids(self._core_interop)
+ cached_model_ids = await get_cached_model_ids(self._core_interop)
return self.id in cached_model_ids
- @property
- def is_loaded(self) -> bool:
+ async def is_loaded(self) -> bool:
"""``True`` if this variant is currently loaded into memory."""
- loaded_model_ids = self._model_load_manager.list_loaded()
+ loaded_model_ids = await self._model_load_manager.list_loaded()
return self.id in loaded_model_ids
- def download(self, progress_callback: Callable[[float], None] = None):
+ async def download(self, progress_callback: Callable[[float], None] = None):
"""Download this variant to the local cache.
Args:
@@ -120,18 +119,19 @@ def download(self, progress_callback: Callable[[float], None] = None):
"""
request = InteropRequest(params={"Model": self.id})
if progress_callback is None:
- response = self._core_interop.execute_command("download_model", request)
+ response = await self._core_interop.execute_command("download_model", request)
else:
- response = self._core_interop.execute_command_with_callback(
+ loop = asyncio.get_running_loop()
+ response = await self._core_interop.execute_command_with_callback(
"download_model", request,
- lambda pct_str: progress_callback(float(pct_str))
+ lambda pct_str: loop.call_soon_threadsafe(progress_callback, float(pct_str))
)
logger.info("Download response: %s", response)
if response.error is not None:
raise FoundryLocalException(f"Failed to download model: {response.error}")
- def get_path(self) -> str:
+ async def get_path(self) -> str:
"""Get the local file-system path to this variant if cached.
Returns:
@@ -141,27 +141,27 @@ def get_path(self) -> str:
FoundryLocalException: If the model path cannot be retrieved.
"""
request = InteropRequest(params={"Model": self.id})
- response = self._core_interop.execute_command("get_model_path", request)
+ response = await self._core_interop.execute_command("get_model_path", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to get model path: {response.error}")
return response.data
- def load(self) -> None:
+ async def load(self) -> None:
"""Load this variant into memory for inference."""
- self._model_load_manager.load(self.id)
+ await self._model_load_manager.load(self.id)
- def remove_from_cache(self) -> None:
+ async def remove_from_cache(self) -> None:
"""Remove this variant from the local model cache."""
request = InteropRequest(params={"Model": self.id})
- response = self._core_interop.execute_command("remove_cached_model", request)
+ response = await self._core_interop.execute_command("remove_cached_model", request)
if response.error is not None:
raise FoundryLocalException(f"Failed to remove model from cache: {response.error}")
- def unload(self) -> None:
+ async def unload(self) -> None:
"""Unload this variant from memory."""
- self._model_load_manager.unload(self.id)
+ await self._model_load_manager.unload(self.id)
def get_chat_client(self) -> ChatClient:
"""Create an OpenAI-compatible ``ChatClient`` for this variant."""
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
index a649f8e5..b402c500 100644
--- a/sdk/python/src/foundry_local_manager.py
+++ b/sdk/python/src/foundry_local_manager.py
@@ -5,6 +5,7 @@
from __future__ import annotations
+import asyncio
import json
import logging
import threading
@@ -27,8 +28,8 @@
class FoundryLocalManager:
"""Singleton manager for Foundry Local SDK operations.
- Call ``FoundryLocalManager.initialize(config)`` once at startup, then access
- the singleton via ``FoundryLocalManager.instance``.
+ Call ``await FoundryLocalManager.initialize(config)`` once at startup, then
+ access the singleton via ``FoundryLocalManager.instance``.
Attributes:
instance: The singleton ``FoundryLocalManager`` instance (set after ``initialize``).
@@ -40,17 +41,20 @@ class FoundryLocalManager:
instance: FoundryLocalManager = None
@staticmethod
- def initialize(config: Configuration):
+ async def initialize(config: Configuration):
"""Initialize the Foundry Local SDK with the given configuration.
- This method must be called before using any other part of the SDK.
+ This coroutine must be awaited before using any other part of the SDK::
+
+ await FoundryLocalManager.initialize(config)
+ manager = FoundryLocalManager.instance
Args:
config: Configuration object for the SDK.
"""
- # Delegate singleton creation to the constructor, which enforces
- # the singleton invariant under a lock and sets `instance`.
- FoundryLocalManager(config)
+ # Run the synchronous constructor in a thread to avoid blocking
+ # the event loop during native library initialization.
+ await asyncio.to_thread(FoundryLocalManager, config)
def __init__(self, config: Configuration):
# Enforce singleton creation under a class-level lock and ensure
@@ -66,6 +70,7 @@ def __init__(self, config: Configuration):
FoundryLocalManager.instance = self
self.urls = None
+ self._async_lock = asyncio.Lock()
def _initialize(self):
set_default_logger_severity(self.config.log_level)
@@ -76,7 +81,7 @@ def _initialize(self):
self._model_load_manager = ModelLoadManager(self._core_interop, external_service_url)
self.catalog = Catalog(self._model_load_manager, self._core_interop)
- def discover_eps(self) -> list[EpInfo]:
+ async def discover_eps(self) -> list[EpInfo]:
"""Discover available execution providers and their registration status.
Returns:
@@ -85,7 +90,7 @@ def discover_eps(self) -> list[EpInfo]:
Raises:
FoundryLocalException: If EP discovery fails or response JSON is invalid.
"""
- response = self._core_interop.execute_command("discover_eps")
+ response = await self._core_interop.execute_command("discover_eps")
if response.error is not None:
raise FoundryLocalException(f"Error discovering execution providers: {response.error}")
@@ -97,7 +102,7 @@ def discover_eps(self) -> list[EpInfo]:
f"Failed to decode JSON response from discover_eps: {e}. Response was: {response.data}"
) from e
- def download_and_register_eps(
+ async def download_and_register_eps(
self,
names: Optional[list[str]] = None,
progress_callback: Optional[Callable[[str, float], None]] = None,
@@ -121,21 +126,23 @@ def download_and_register_eps(
request = InteropRequest(params={"Names": ",".join(names)})
if progress_callback is not None:
+ loop = asyncio.get_running_loop()
+
def _on_chunk(chunk: str) -> None:
sep = chunk.find("|")
if sep >= 0:
ep_name = chunk[:sep] or ""
try:
percent = float(chunk[sep + 1:])
- progress_callback(ep_name, percent)
+ loop.call_soon_threadsafe(progress_callback, ep_name, percent)
except ValueError:
pass
- response = self._core_interop.execute_command_with_callback(
+ response = await self._core_interop.execute_command_with_callback(
"download_and_register_eps", request, _on_chunk
)
else:
- response = self._core_interop.execute_command("download_and_register_eps", request)
+ response = await self._core_interop.execute_command("download_and_register_eps", request)
if response.error is not None:
raise FoundryLocalException(f"Error downloading execution providers: {response.error}")
@@ -157,11 +164,11 @@ def _on_chunk(chunk: str) -> None:
# Invalidate the catalog cache if any EP was newly registered so the next access
# re-fetches models with the updated set of available EPs.
if ep_result.success or len(ep_result.registered_eps) > 0:
- self.catalog._invalidate_cache()
+ await self.catalog._invalidate_cache()
return ep_result
- def start_web_service(self):
+ async def start_web_service(self):
"""Start the optional web service.
If provided, the service will be bound to the value of Configuration.web.urls.
@@ -169,8 +176,8 @@ def start_web_service(self):
FoundryLocalManager.urls will be updated with the actual URL/s the service is listening on.
"""
- with FoundryLocalManager._lock:
- response = self._core_interop.execute_command("start_service")
+ async with self._async_lock:
+ response = await self._core_interop.execute_command("start_service")
if response.error is not None:
raise FoundryLocalException(f"Error starting web service: {response.error}")
@@ -181,14 +188,14 @@ def start_web_service(self):
self.urls = bound_urls
- def stop_web_service(self):
+ async def stop_web_service(self):
"""Stop the optional web service."""
- with FoundryLocalManager._lock:
+ async with self._async_lock:
if self.urls is None:
raise FoundryLocalException("Web service is not running.")
- response = self._core_interop.execute_command("stop_service")
+ response = await self._core_interop.execute_command("stop_service")
if response.error is not None:
raise FoundryLocalException(f"Error stopping web service: {response.error}")
diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py
index 8237aeb4..e1e7bcf2 100644
--- a/sdk/python/src/imodel.py
+++ b/sdk/python/src/imodel.py
@@ -32,15 +32,13 @@ def info(self) -> ModelInfo:
"""Full model metadata."""
pass
- @property
@abstractmethod
- def is_cached(self) -> bool:
+ async def is_cached(self) -> bool:
"""True if the model is present in the local cache."""
pass
- @property
@abstractmethod
- def is_loaded(self) -> bool:
+ async def is_loaded(self) -> bool:
"""True if the model is loaded into memory."""
pass
@@ -75,7 +73,7 @@ def supports_tool_calling(self) -> Optional[bool]:
pass
@abstractmethod
- def download(self, progress_callback: Callable[[float], None] = None) -> None:
+ async def download(self, progress_callback: Callable[[float], None] = None) -> None:
"""
Download the model to local cache if not already present.
:param progress_callback: Optional callback function for download progress as a percentage (0.0 to 100.0).
@@ -83,7 +81,7 @@ def download(self, progress_callback: Callable[[float], None] = None) -> None:
pass
@abstractmethod
- def get_path(self) -> str:
+ async def get_path(self) -> str:
"""
Gets the model path if cached.
:return: Path of model directory.
@@ -91,21 +89,21 @@ def get_path(self) -> str:
pass
@abstractmethod
- def load(self) -> None:
+ async def load(self) -> None:
"""
Load the model into memory if not already loaded.
"""
pass
@abstractmethod
- def remove_from_cache(self) -> None:
+ async def remove_from_cache(self) -> None:
"""
Remove the model from the local cache.
"""
pass
@abstractmethod
- def unload(self) -> None:
+ async def unload(self) -> None:
"""
Unload the model if loaded.
"""
diff --git a/sdk/python/src/openai/audio_client.py b/sdk/python/src/openai/audio_client.py
index 0858e4aa..ff8f133d 100644
--- a/sdk/python/src/openai/audio_client.py
+++ b/sdk/python/src/openai/audio_client.py
@@ -5,12 +5,12 @@
from __future__ import annotations
+import asyncio
import json
import logging
-import queue
import threading
from dataclasses import dataclass
-from typing import Generator, List, Optional
+from typing import AsyncGenerator, List, Optional
from ..detail.core_interop import CoreInterop, InteropRequest
from ..exception import FoundryLocalException
@@ -89,7 +89,7 @@ def _create_request_json(self, audio_file_path: str) -> str:
return json.dumps(request)
- def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
+ async def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
"""Transcribe an audio file (non-streaming).
Args:
@@ -107,7 +107,7 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
request_json = self._create_request_json(audio_file_path)
request = InteropRequest(params={"OpenAICreateRequest": request_json})
- response = self._core_interop.execute_command("audio_transcribe", request)
+ response = await self._core_interop.execute_command("audio_transcribe", request)
if response.error is not None:
raise FoundryLocalException(
f"Audio transcription failed for model '{self.model_id}': {response.error}"
@@ -116,19 +116,23 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
data = json.loads(response.data)
return AudioTranscriptionResponse(text=data.get("text", ""))
- def _stream_chunks(self, request_json: str) -> Generator[AudioTranscriptionResponse, None, None]:
- """Background-thread generator that yields parsed chunks from the native streaming call."""
- _SENTINEL = object()
- chunk_queue: queue.Queue = queue.Queue()
+ async def _stream_chunks(self, request_json: str) -> AsyncGenerator[AudioTranscriptionResponse, None]:
+ """Async generator that yields parsed chunks from the native streaming call."""
+ chunk_queue: asyncio.Queue = asyncio.Queue()
+ loop = asyncio.get_running_loop()
errors: List[Exception] = []
+ cancelled = threading.Event()
def _on_chunk(chunk_str: str) -> None:
+ if cancelled.is_set():
+ return
chunk_data = json.loads(chunk_str)
- chunk_queue.put(AudioTranscriptionResponse(text=chunk_data.get("text", "")))
+ chunk = AudioTranscriptionResponse(text=chunk_data.get("text", ""))
+ loop.call_soon_threadsafe(chunk_queue.put_nowait, chunk)
- def _run() -> None:
+ async def _execute() -> None:
try:
- resp = self._core_interop.execute_command_with_callback(
+ resp = await self._core_interop.execute_command_with_callback(
"audio_transcribe",
InteropRequest(params={"OpenAICreateRequest": request_json}),
_on_chunk,
@@ -142,30 +146,44 @@ def _run() -> None:
except Exception as exc:
errors.append(exc)
finally:
- chunk_queue.put(_SENTINEL)
-
- threading.Thread(target=_run, daemon=True).start()
- while (item := chunk_queue.get()) is not _SENTINEL:
- yield item
+ await chunk_queue.put(None)
+
+ task = asyncio.create_task(_execute())
+ try:
+ while True:
+ item = await chunk_queue.get()
+ if item is None:
+ break
+ yield item
+ finally:
+ # Signal the callback to drop further chunks, then cancel the task.
+ # The native call may continue on its worker thread, but _on_chunk
+ # will no-op so the queue stops growing.
+ cancelled.set()
+ task.cancel()
+ try:
+ await task
+ except asyncio.CancelledError:
+ pass
if errors:
raise errors[0]
- def transcribe_streaming(
+ async def transcribe_streaming(
self,
audio_file_path: str,
- ) -> Generator[AudioTranscriptionResponse, None, None]:
+ ) -> AsyncGenerator[AudioTranscriptionResponse, None]:
"""Transcribe an audio file with streaming chunks.
- Consume with a standard ``for`` loop::
+ Consume with a standard ``async for`` loop::
- for chunk in audio_client.transcribe_streaming("recording.mp3"):
+ async for chunk in audio_client.transcribe_streaming("recording.mp3"):
print(chunk.text, end="", flush=True)
Args:
audio_file_path: Path to the audio file to transcribe.
Returns:
- A generator of ``AudioTranscriptionResponse`` objects.
+ An async generator of ``AudioTranscriptionResponse`` objects.
Raises:
ValueError: If *audio_file_path* is not a non-empty string.
@@ -174,4 +192,5 @@ def transcribe_streaming(
self._validate_audio_file_path(audio_file_path)
request_json = self._create_request_json(audio_file_path)
- return self._stream_chunks(request_json)
\ No newline at end of file
+ async for chunk in self._stream_chunks(request_json):
+ yield chunk
\ No newline at end of file
diff --git a/sdk/python/src/openai/chat_client.py b/sdk/python/src/openai/chat_client.py
index 0b0d58bc..4b1b54c5 100644
--- a/sdk/python/src/openai/chat_client.py
+++ b/sdk/python/src/openai/chat_client.py
@@ -5,9 +5,9 @@
from __future__ import annotations
+import asyncio
import logging
import json
-import queue
import threading
from ..detail.core_interop import CoreInterop, InteropRequest
@@ -18,7 +18,7 @@
CompletionCreateParamsStreaming
from openai.types.chat import ChatCompletion
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
-from typing import Any, Dict, Generator, List, Optional
+from typing import Any, AsyncGenerator, Dict, List, Optional
logger = logging.getLogger(__name__)
@@ -192,7 +192,7 @@ def _create_request(
return json.dumps(chat_request)
- def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None):
+ async def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None):
"""Perform a non-streaming chat completion.
Args:
@@ -212,7 +212,7 @@ def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optio
# Send the request to the chat API
request = InteropRequest(params={"OpenAICreateRequest": chat_request_json})
- response = self._core_interop.execute_command("chat_completions", request)
+ response = await self._core_interop.execute_command("chat_completions", request)
if response.error is not None:
raise FoundryLocalException(f"Error during chat completion: {response.error}")
@@ -220,13 +220,16 @@ def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optio
return completion
- def _stream_chunks(self, chat_request_json: str) -> Generator[ChatCompletionChunk, None, None]:
- """Background-thread generator that yields parsed chunks from the native streaming call."""
- _SENTINEL = object()
- chunk_queue: queue.Queue = queue.Queue()
+ async def _stream_chunks(self, chat_request_json: str) -> AsyncGenerator[ChatCompletionChunk, None]:
+ """Async generator that yields parsed chunks from the native streaming call."""
+ chunk_queue: asyncio.Queue = asyncio.Queue()
+ loop = asyncio.get_running_loop()
errors: List[Exception] = []
+ cancelled = threading.Event()
def _on_chunk(response_str: str) -> None:
+ if cancelled.is_set():
+ return
raw = json.loads(response_str)
# Foundry Local returns tool call chunks with "message.tool_calls" instead
# of the standard streaming "delta.tool_calls". Normalize to delta format
@@ -238,11 +241,12 @@ def _on_chunk(response_str: str) -> None:
for i, tc in enumerate(msg.get("tool_calls", [])):
tc.setdefault("index", i)
choice["delta"] = msg
- chunk_queue.put(ChatCompletionChunk.model_validate(raw))
+ chunk = ChatCompletionChunk.model_validate(raw)
+ loop.call_soon_threadsafe(chunk_queue.put_nowait, chunk)
- def _run() -> None:
+ async def _execute() -> None:
try:
- resp = self._core_interop.execute_command_with_callback(
+ resp = await self._core_interop.execute_command_with_callback(
"chat_completions",
InteropRequest(params={"OpenAICreateRequest": chat_request_json}),
_on_chunk,
@@ -252,24 +256,38 @@ def _run() -> None:
except Exception as exc:
errors.append(exc)
finally:
- chunk_queue.put(_SENTINEL)
-
- threading.Thread(target=_run, daemon=True).start()
- while (item := chunk_queue.get()) is not _SENTINEL:
- yield item
+ await chunk_queue.put(None)
+
+ task = asyncio.create_task(_execute())
+ try:
+ while True:
+ item = await chunk_queue.get()
+ if item is None:
+ break
+ yield item
+ finally:
+ # Signal the callback to drop further chunks, then cancel the task.
+ # The native call may continue on its worker thread, but _on_chunk
+ # will no-op so the queue stops growing.
+ cancelled.set()
+ task.cancel()
+ try:
+ await task
+ except asyncio.CancelledError:
+ pass
if errors:
raise errors[0]
- def complete_streaming_chat(
+ async def complete_streaming_chat(
self,
messages: List[ChatCompletionMessageParam],
tools: Optional[List[Dict[str, Any]]] = None,
- ) -> Generator[ChatCompletionChunk, None, None]:
+ ) -> AsyncGenerator[ChatCompletionChunk, None]:
"""Perform a streaming chat completion, yielding chunks as they arrive.
- Consume with a standard ``for`` loop::
+ Consume with a standard ``async for`` loop::
- for chunk in client.complete_streaming_chat(messages):
+ async for chunk in client.complete_streaming_chat(messages):
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="", flush=True)
@@ -278,7 +296,7 @@ def complete_streaming_chat(
tools: Optional list of tool definitions for function calling.
Returns:
- A generator of ``ChatCompletionChunk`` objects.
+ An async generator of ``ChatCompletionChunk`` objects.
Raises:
ValueError: If messages or tools are malformed.
@@ -287,4 +305,5 @@ def complete_streaming_chat(
self._validate_messages(messages)
self._validate_tools(tools)
chat_request_json = self._create_request(messages, streaming=True, tools=tools)
- return self._stream_chunks(chat_request_json)
+ async for chunk in self._stream_chunks(chat_request_json):
+ yield chunk
diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py
index 1cb85704..53f12124 100644
--- a/sdk/python/test/conftest.py
+++ b/sdk/python/test/conftest.py
@@ -16,6 +16,7 @@
import logging
import pytest
+import pytest_asyncio
from pathlib import Path
@@ -105,14 +106,14 @@ def get_multiply_tool():
# Session-scoped fixtures
# ---------------------------------------------------------------------------
-@pytest.fixture(scope="session")
-def manager():
+@pytest_asyncio.fixture(scope="session")
+async def manager():
"""Initialize FoundryLocalManager once for the entire test session."""
# Reset singleton in case a previous run left state
FoundryLocalManager.instance = None
config = get_test_config()
- FoundryLocalManager.initialize(config)
+ await FoundryLocalManager.initialize(config)
mgr = FoundryLocalManager.instance
assert mgr is not None, "FoundryLocalManager.initialize did not set instance"
@@ -121,10 +122,10 @@ def manager():
# Teardown: unload all loaded models
try:
catalog = mgr.catalog
- loaded = catalog.get_loaded_models()
+ loaded = await catalog.get_loaded_models()
for model_variant in loaded:
try:
- model_variant.unload()
+ await model_variant.unload()
except Exception as e:
logger.warning("Failed to unload model %s during teardown: %s", model_variant.id, e)
except Exception as e:
@@ -134,19 +135,19 @@ def manager():
FoundryLocalManager.instance = None
-@pytest.fixture(scope="session")
-def catalog(manager):
+@pytest_asyncio.fixture(scope="session")
+async def catalog(manager):
"""Return the Catalog from the session-scoped manager."""
return manager.catalog
-@pytest.fixture(scope="session")
-def core_interop(manager):
+@pytest_asyncio.fixture(scope="session")
+async def core_interop(manager):
"""Return the CoreInterop from the session-scoped manager (internal, for component tests)."""
return manager._core_interop
-@pytest.fixture(scope="session")
-def model_load_manager(manager):
+@pytest_asyncio.fixture(scope="session")
+async def model_load_manager(manager):
"""Return the ModelLoadManager from the session-scoped manager (internal, for component tests)."""
return manager._model_load_manager
diff --git a/sdk/python/test/detail/test_model_load_manager.py b/sdk/python/test/detail/test_model_load_manager.py
index a5a231e3..a64b9e14 100644
--- a/sdk/python/test/detail/test_model_load_manager.py
+++ b/sdk/python/test/detail/test_model_load_manager.py
@@ -15,67 +15,71 @@
class TestModelLoadManagerCoreInterop:
"""ModelLoadManager tests using Core Interop (no external URL)."""
- def _get_model_id(self, catalog) -> str:
+ async def _get_model_id(self, catalog) -> str:
"""Resolve the variant ID for the test model alias."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached"
return variant.id
- def test_should_load_model(self, catalog, core_interop):
+ @pytest.mark.asyncio
+ async def test_should_load_model(self, catalog, core_interop):
"""Load model via core interop and verify it appears in loaded list."""
- model_id = self._get_model_id(catalog)
+ model_id = await self._get_model_id(catalog)
mlm = ModelLoadManager(core_interop)
- mlm.load(model_id)
- loaded = mlm.list_loaded()
+ await mlm.load(model_id)
+ loaded = await mlm.list_loaded()
assert model_id in loaded
# Cleanup
- mlm.unload(model_id)
+ await mlm.unload(model_id)
- def test_should_unload_model(self, catalog, core_interop):
+ @pytest.mark.asyncio
+ async def test_should_unload_model(self, catalog, core_interop):
"""Load then unload model via core interop."""
- model_id = self._get_model_id(catalog)
+ model_id = await self._get_model_id(catalog)
mlm = ModelLoadManager(core_interop)
- mlm.load(model_id)
- loaded = mlm.list_loaded()
+ await mlm.load(model_id)
+ loaded = await mlm.list_loaded()
assert model_id in loaded
- mlm.unload(model_id)
- loaded = mlm.list_loaded()
+ await mlm.unload(model_id)
+ loaded = await mlm.list_loaded()
assert model_id not in loaded
- def test_should_list_loaded_models(self, catalog, core_interop):
+ @pytest.mark.asyncio
+ async def test_should_list_loaded_models(self, catalog, core_interop):
"""list_loaded() should return an array containing the loaded model."""
- model_id = self._get_model_id(catalog)
+ model_id = await self._get_model_id(catalog)
mlm = ModelLoadManager(core_interop)
- mlm.load(model_id)
- loaded = mlm.list_loaded()
+ await mlm.load(model_id)
+ loaded = await mlm.list_loaded()
assert isinstance(loaded, list)
assert model_id in loaded
# Cleanup
- mlm.unload(model_id)
+ await mlm.unload(model_id)
class TestModelLoadManagerExternalService:
"""ModelLoadManager tests using external web service URL (skipped in CI)."""
@skip_in_ci
- def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop):
+ @pytest.mark.asyncio
+ async def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop):
"""Load/unload model through the web service endpoint."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
assert variant is not None
model_id = variant.id
# Start web service
try:
- manager.start_web_service()
+ await manager.start_web_service()
except Exception as e:
pytest.skip(f"Failed to start web service: {e}")
@@ -88,33 +92,34 @@ def test_should_load_and_unload_via_external_service(self, manager, catalog, cor
try:
# Setup: load via core interop
setup_mlm = ModelLoadManager(core_interop)
- setup_mlm.load(model_id)
- loaded = setup_mlm.list_loaded()
+ await setup_mlm.load(model_id)
+ loaded = await setup_mlm.list_loaded()
assert model_id in loaded
# Unload via external service
ext_mlm = ModelLoadManager(core_interop, service_url)
- ext_mlm.unload(model_id)
+ await ext_mlm.unload(model_id)
# Verify via core interop
- loaded = setup_mlm.list_loaded()
+ loaded = await setup_mlm.list_loaded()
assert model_id not in loaded
finally:
try:
- manager.stop_web_service()
+ await manager.stop_web_service()
except Exception:
pass
@skip_in_ci
- def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop):
+ @pytest.mark.asyncio
+ async def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop):
"""list_loaded() through the web service endpoint should match core interop."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
assert variant is not None
model_id = variant.id
try:
- manager.start_web_service()
+ await manager.start_web_service()
except Exception as e:
pytest.skip(f"Failed to start web service: {e}")
@@ -127,18 +132,18 @@ def test_should_list_loaded_via_external_service(self, manager, catalog, core_in
try:
# Setup: load via core
setup_mlm = ModelLoadManager(core_interop)
- setup_mlm.load(model_id)
+ await setup_mlm.load(model_id)
# Verify via external service
ext_mlm = ModelLoadManager(core_interop, service_url)
- loaded = ext_mlm.list_loaded()
+ loaded = await ext_mlm.list_loaded()
assert isinstance(loaded, list)
assert model_id in loaded
# Cleanup
- setup_mlm.unload(model_id)
+ await setup_mlm.unload(model_id)
finally:
try:
- manager.stop_web_service()
+ await manager.stop_web_service()
except Exception:
pass
diff --git a/sdk/python/test/openai/test_audio_client.py b/sdk/python/test/openai/test_audio_client.py
index 0d365eef..160577be 100644
--- a/sdk/python/test/openai/test_audio_client.py
+++ b/sdk/python/test/openai/test_audio_client.py
@@ -19,28 +19,29 @@
)
-def _get_loaded_audio_model(catalog):
+async def _get_loaded_audio_model(catalog):
"""Helper: ensure the whisper model is selected, loaded, and return Model."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
assert len(cached) > 0
cached_variant = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None)
assert cached_variant is not None, f"{AUDIO_MODEL_ALIAS} should be cached"
- model = catalog.get_model(AUDIO_MODEL_ALIAS)
+ model = await catalog.get_model(AUDIO_MODEL_ALIAS)
assert model is not None
model.select_variant(cached_variant)
- model.load()
+ await model.load()
return model
class TestAudioClient:
"""Audio Client Tests."""
- def test_should_transcribe_audio(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_transcribe_audio(self, catalog):
"""Non-streaming transcription of Recording.mp3."""
- model = _get_loaded_audio_model(catalog)
+ model = await _get_loaded_audio_model(catalog)
try:
audio_client = model.get_audio_client()
assert audio_client is not None
@@ -48,7 +49,7 @@ def test_should_transcribe_audio(self, catalog):
audio_client.settings.language = "en"
audio_client.settings.temperature = 0.0
- response = audio_client.transcribe(AUDIO_FILE_PATH)
+ response = await audio_client.transcribe(AUDIO_FILE_PATH)
assert response is not None
assert hasattr(response, "text")
@@ -56,11 +57,12 @@ def test_should_transcribe_audio(self, catalog):
assert len(response.text) > 0
assert response.text == EXPECTED_TEXT
finally:
- model.unload()
+ await model.unload()
- def test_should_transcribe_audio_with_temperature(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_transcribe_audio_with_temperature(self, catalog):
"""Non-streaming transcription with explicit temperature."""
- model = _get_loaded_audio_model(catalog)
+ model = await _get_loaded_audio_model(catalog)
try:
audio_client = model.get_audio_client()
assert audio_client is not None
@@ -68,18 +70,19 @@ def test_should_transcribe_audio_with_temperature(self, catalog):
audio_client.settings.language = "en"
audio_client.settings.temperature = 0.0
- response = audio_client.transcribe(AUDIO_FILE_PATH)
+ response = await audio_client.transcribe(AUDIO_FILE_PATH)
assert response is not None
assert isinstance(response.text, str)
assert len(response.text) > 0
assert response.text == EXPECTED_TEXT
finally:
- model.unload()
+ await model.unload()
- def test_should_transcribe_audio_streaming(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_transcribe_audio_streaming(self, catalog):
"""Streaming transcription of Recording.mp3."""
- model = _get_loaded_audio_model(catalog)
+ model = await _get_loaded_audio_model(catalog)
try:
audio_client = model.get_audio_client()
assert audio_client is not None
@@ -88,7 +91,7 @@ def test_should_transcribe_audio_streaming(self, catalog):
audio_client.settings.temperature = 0.0
chunks = []
- for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
+ async for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
assert chunk is not None
assert hasattr(chunk, "text")
assert isinstance(chunk.text, str)
@@ -98,11 +101,12 @@ def test_should_transcribe_audio_streaming(self, catalog):
full_text = "".join(chunks)
assert full_text == EXPECTED_TEXT
finally:
- model.unload()
+ await model.unload()
- def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
"""Streaming transcription with explicit temperature."""
- model = _get_loaded_audio_model(catalog)
+ model = await _get_loaded_audio_model(catalog)
try:
audio_client = model.get_audio_client()
assert audio_client is not None
@@ -111,7 +115,7 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
audio_client.settings.temperature = 0.0
chunks = []
- for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
+ async for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
assert chunk is not None
assert isinstance(chunk.text, str)
chunks.append(chunk.text)
@@ -119,22 +123,25 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
full_text = "".join(chunks)
assert full_text == EXPECTED_TEXT
finally:
- model.unload()
+ await model.unload()
- def test_should_raise_for_empty_audio_file_path(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_raise_for_empty_audio_file_path(self, catalog):
"""transcribe('') should raise."""
- model = catalog.get_model(AUDIO_MODEL_ALIAS)
+ model = await catalog.get_model(AUDIO_MODEL_ALIAS)
assert model is not None
audio_client = model.get_audio_client()
with pytest.raises(ValueError, match="Audio file path must be a non-empty string"):
- audio_client.transcribe("")
+ await audio_client.transcribe("")
- def test_should_raise_for_streaming_empty_audio_file_path(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_raise_for_streaming_empty_audio_file_path(self, catalog):
"""transcribe_streaming('') should raise."""
- model = catalog.get_model(AUDIO_MODEL_ALIAS)
+ model = await catalog.get_model(AUDIO_MODEL_ALIAS)
assert model is not None
audio_client = model.get_audio_client()
with pytest.raises(ValueError, match="Audio file path must be a non-empty string"):
- audio_client.transcribe_streaming("")
+ async for _ in audio_client.transcribe_streaming(""):
+ pass
diff --git a/sdk/python/test/openai/test_chat_client.py b/sdk/python/test/openai/test_chat_client.py
index d96891b9..3580c38b 100644
--- a/sdk/python/test/openai/test_chat_client.py
+++ b/sdk/python/test/openai/test_chat_client.py
@@ -13,34 +13,35 @@
from ..conftest import TEST_MODEL_ALIAS, get_multiply_tool
-def _get_loaded_chat_model(catalog):
+async def _get_loaded_chat_model(catalog):
"""Helper: ensure the test model is selected, loaded, and return Model + ChatClient."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
assert len(cached) > 0
cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
assert cached_variant is not None, f"{TEST_MODEL_ALIAS} should be cached"
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
model.select_variant(cached_variant)
- model.load()
+ await model.load()
return model
class TestChatClient:
"""Chat Client Tests."""
- def test_should_perform_chat_completion(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_perform_chat_completion(self, catalog):
"""Non-streaming chat: 7 * 6 should include '42' in the response."""
- model = _get_loaded_chat_model(catalog)
+ model = await _get_loaded_chat_model(catalog)
try:
client = model.get_chat_client()
client.settings.max_tokens = 500
client.settings.temperature = 0.0 # deterministic
- result = client.complete_chat([
+ result = await client.complete_chat([
{"role": "user",
"content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"}
])
@@ -53,11 +54,12 @@ def test_should_perform_chat_completion(self, catalog):
assert isinstance(content, str)
assert "42" in content
finally:
- model.unload()
+ await model.unload()
- def test_should_perform_streaming_chat_completion(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_perform_streaming_chat_completion(self, catalog):
"""Streaming chat: 7 * 6 = 42, then follow-up +25 = 67."""
- model = _get_loaded_chat_model(catalog)
+ model = await _get_loaded_chat_model(catalog)
try:
client = model.get_chat_client()
client.settings.max_tokens = 500
@@ -69,7 +71,7 @@ def test_should_perform_streaming_chat_completion(self, catalog):
]
# ---- First question ----
- chunks = list(client.complete_streaming_chat(messages))
+ chunks = [c async for c in client.complete_streaming_chat(messages)]
assert len(chunks) > 0
first_response = "".join(
c.choices[0].delta.content
@@ -82,7 +84,7 @@ def test_should_perform_streaming_chat_completion(self, catalog):
messages.append({"role": "assistant", "content": first_response})
messages.append({"role": "user", "content": "Add 25 to the previous answer. Think hard to be sure of the answer."})
- chunks = list(client.complete_streaming_chat(messages))
+ chunks = [c async for c in client.complete_streaming_chat(messages)]
assert len(chunks) > 0
second_response = "".join(
c.choices[0].delta.content
@@ -91,47 +93,54 @@ def test_should_perform_streaming_chat_completion(self, catalog):
)
assert "67" in second_response
finally:
- model.unload()
+ await model.unload()
- def test_should_raise_for_empty_messages(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_raise_for_empty_messages(self, catalog):
"""complete_chat with empty list should raise."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
client = model.get_chat_client()
with pytest.raises(ValueError):
- client.complete_chat([])
+ await client.complete_chat([])
- def test_should_raise_for_none_messages(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_raise_for_none_messages(self, catalog):
"""complete_chat with None should raise."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
client = model.get_chat_client()
with pytest.raises(ValueError):
- client.complete_chat(None)
+ await client.complete_chat(None)
- def test_should_raise_for_streaming_empty_messages(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_raise_for_streaming_empty_messages(self, catalog):
"""complete_streaming_chat with empty list should raise."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
client = model.get_chat_client()
with pytest.raises(ValueError):
- client.complete_streaming_chat([])
+ async for _ in client.complete_streaming_chat([]):
+ pass
- def test_should_raise_for_streaming_none_messages(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_raise_for_streaming_none_messages(self, catalog):
"""complete_streaming_chat with None should raise."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
client = model.get_chat_client()
with pytest.raises(ValueError):
- client.complete_streaming_chat(None)
+ async for _ in client.complete_streaming_chat(None):
+ pass
- def test_should_perform_tool_calling_chat_completion(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_perform_tool_calling_chat_completion(self, catalog):
"""Tool calling (non-streaming): model uses multiply_numbers tool to answer 7 * 6."""
- model = _get_loaded_chat_model(catalog)
+ model = await _get_loaded_chat_model(catalog)
try:
client = model.get_chat_client()
client.settings.max_tokens = 500
@@ -145,7 +154,7 @@ def test_should_perform_tool_calling_chat_completion(self, catalog):
tools = [get_multiply_tool()]
# First turn: model should respond with a tool call
- response = client.complete_chat(messages, tools)
+ response = await client.complete_chat(messages, tools)
assert response is not None
assert response.choices is not None
@@ -168,16 +177,17 @@ def test_should_perform_tool_calling_chat_completion(self, catalog):
messages.append({"role": "system", "content": "Respond only with the answer generated by the tool."})
client.settings.tool_choice = {"type": "auto"}
- response = client.complete_chat(messages, tools)
+ response = await client.complete_chat(messages, tools)
assert response.choices[0].message.content is not None
assert "42" in response.choices[0].message.content
finally:
- model.unload()
+ await model.unload()
- def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
"""Tool calling (streaming): model uses multiply_numbers tool, then continue conversation."""
- model = _get_loaded_chat_model(catalog)
+ model = await _get_loaded_chat_model(catalog)
try:
client = model.get_chat_client()
client.settings.max_tokens = 500
@@ -191,7 +201,7 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
tools = [get_multiply_tool()]
# First turn: collect chunks and find the tool call
- chunks = list(client.complete_streaming_chat(messages, tools))
+ chunks = [c async for c in client.complete_streaming_chat(messages, tools)]
last_tool_call_chunk = next(
(c for c in reversed(chunks)
if c.choices and c.choices[0].delta and c.choices[0].delta.tool_calls),
@@ -216,7 +226,7 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
client.settings.tool_choice = {"type": "auto"}
- chunks = list(client.complete_streaming_chat(messages, tools))
+ chunks = [c async for c in client.complete_streaming_chat(messages, tools)]
second_response = "".join(
c.choices[0].delta.content
for c in chunks
@@ -224,20 +234,18 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
)
assert "42" in second_response
finally:
- model.unload()
+ await model.unload()
- def test_should_return_generator(self, catalog):
- """complete_streaming_chat returns a generator that yields chunks."""
- model = _get_loaded_chat_model(catalog)
+ @pytest.mark.asyncio
+ async def test_should_return_async_generator(self, catalog):
+ """complete_streaming_chat returns an async generator that yields chunks."""
+ model = await _get_loaded_chat_model(catalog)
try:
client = model.get_chat_client()
client.settings.max_tokens = 50
client.settings.temperature = 0.0
- result = client.complete_streaming_chat([{"role": "user", "content": "Say hi."}])
-
- assert result is not None
- chunks = list(result)
+ chunks = [c async for c in client.complete_streaming_chat([{"role": "user", "content": "Say hi."}])]
assert len(chunks) > 0
finally:
- model.unload()
\ No newline at end of file
+ await model.unload()
\ No newline at end of file
diff --git a/sdk/python/test/test_catalog.py b/sdk/python/test/test_catalog.py
index 2e5968cc..5d13326c 100644
--- a/sdk/python/test/test_catalog.py
+++ b/sdk/python/test/test_catalog.py
@@ -7,6 +7,7 @@
from __future__ import annotations
import json
+import pytest
from foundry_local_sdk.catalog import Catalog
from foundry_local_sdk.detail.core_interop import Response
@@ -17,14 +18,16 @@
class TestCatalog:
"""Catalog Tests."""
- def test_should_initialize_with_catalog_name(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_initialize_with_catalog_name(self, catalog):
"""Catalog should expose a non-empty name string."""
assert isinstance(catalog.name, str)
assert len(catalog.name) > 0
- def test_should_list_models(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_list_models(self, catalog):
"""list_models() should return a non-empty list containing the test model."""
- models = catalog.list_models()
+ models = await catalog.list_models()
assert isinstance(models, list)
assert len(models) > 0
@@ -32,25 +35,29 @@ def test_should_list_models(self, catalog):
aliases = {m.alias for m in models}
assert TEST_MODEL_ALIAS in aliases
- def test_should_get_model_by_alias(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_get_model_by_alias(self, catalog):
"""get_model() should return a Model whose alias matches."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
assert model.alias == TEST_MODEL_ALIAS
- def test_should_return_none_for_empty_alias(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_return_none_for_empty_alias(self, catalog):
"""get_model('') should return None (unknown alias)."""
- result = catalog.get_model("")
+ result = await catalog.get_model("")
assert result is None
- def test_should_return_none_for_unknown_alias(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_return_none_for_unknown_alias(self, catalog):
"""get_model() with a random alias should return None."""
- result = catalog.get_model("definitely-not-a-real-model-alias-12345")
+ result = await catalog.get_model("definitely-not-a-real-model-alias-12345")
assert result is None
- def test_should_get_cached_models(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_get_cached_models(self, catalog):
"""get_cached_models() should return a list with at least the test model."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
assert isinstance(cached, list)
assert len(cached) > 0
@@ -58,27 +65,31 @@ def test_should_get_cached_models(self, catalog):
aliases = {m.alias for m in cached}
assert TEST_MODEL_ALIAS in aliases
- def test_should_get_model_variant_by_id(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_get_model_variant_by_id(self, catalog):
"""get_model_variant() with a valid ID should return the variant."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
assert len(cached) > 0
variant = cached[0]
- result = catalog.get_model_variant(variant.id)
+ result = await catalog.get_model_variant(variant.id)
assert result is not None
assert result.id == variant.id
- def test_should_return_none_for_empty_variant_id(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_return_none_for_empty_variant_id(self, catalog):
"""get_model_variant('') should return None."""
- result = catalog.get_model_variant("")
+ result = await catalog.get_model_variant("")
assert result is None
- def test_should_return_none_for_unknown_variant_id(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_return_none_for_unknown_variant_id(self, catalog):
"""get_model_variant() with a random ID should return None."""
- result = catalog.get_model_variant("definitely-not-a-real-model-id-12345")
+ result = await catalog.get_model_variant("definitely-not-a-real-model-id-12345")
assert result is None
- def test_should_resolve_latest_version_for_model_and_variant_inputs(self):
+ @pytest.mark.asyncio
+ async def test_should_resolve_latest_version_for_model_and_variant_inputs(self):
"""get_latest_version() should resolve latest variant and preserve Model input when already latest."""
test_model_infos = [
@@ -124,9 +135,12 @@ def test_should_resolve_latest_version_for_model_and_variant_inputs(self):
]
class _MockCoreInterop:
- def execute_command(self, command_name, command_input=None):
+ def _execute_command(self, command_name, command_input=None):
if command_name == "get_catalog_name":
return Response(data="TestCatalog", error=None)
+ return Response(data=None, error=f"Unexpected command: {command_name}")
+
+ async def execute_command(self, command_name, command_input=None):
if command_name == "get_model_list":
return Response(data=json.dumps(test_model_infos), error=None)
if command_name == "get_cached_models":
@@ -134,12 +148,12 @@ def execute_command(self, command_name, command_input=None):
return Response(data=None, error=f"Unexpected command: {command_name}")
class _MockModelLoadManager:
- def list_loaded(self):
+ async def list_loaded(self):
return []
catalog = Catalog(_MockModelLoadManager(), _MockCoreInterop())
- model = catalog.get_model("test-alias")
+ model = await catalog.get_model("test-alias")
assert model is not None
variants = model.variants
@@ -153,15 +167,15 @@ def list_loaded(self):
assert middle_variant.id == "test-model:2"
assert oldest_variant.id == "test-model:1"
- result1 = catalog.get_latest_version(latest_variant)
+ result1 = await catalog.get_latest_version(latest_variant)
assert result1.id == "test-model:3"
- result2 = catalog.get_latest_version(middle_variant)
+ result2 = await catalog.get_latest_version(middle_variant)
assert result2.id == "test-model:3"
- result3 = catalog.get_latest_version(oldest_variant)
+ result3 = await catalog.get_latest_version(oldest_variant)
assert result3.id == "test-model:3"
model.select_variant(latest_variant)
- result4 = catalog.get_latest_version(model)
+ result4 = await catalog.get_latest_version(model)
assert result4 is model
diff --git a/sdk/python/test/test_foundry_local_manager.py b/sdk/python/test/test_foundry_local_manager.py
index 31528891..bf860931 100644
--- a/sdk/python/test/test_foundry_local_manager.py
+++ b/sdk/python/test/test_foundry_local_manager.py
@@ -6,6 +6,8 @@
from __future__ import annotations
+import pytest
+
class _Response:
def __init__(self, data=None, error=None):
@@ -18,7 +20,7 @@ def __init__(self, responses):
self._responses = responses
self.calls = []
- def execute_command(self, command_name, command_input=None):
+ async def execute_command(self, command_name, command_input=None):
self.calls.append((command_name, command_input))
return self._responses[command_name]
@@ -26,18 +28,21 @@ def execute_command(self, command_name, command_input=None):
class TestFoundryLocalManager:
"""Foundry Local Manager Tests."""
- def test_should_initialize_successfully(self, manager):
+ @pytest.mark.asyncio
+ async def test_should_initialize_successfully(self, manager):
"""Manager singleton should be non-None after initialize()."""
assert manager is not None
- def test_should_return_catalog(self, manager):
+ @pytest.mark.asyncio
+ async def test_should_return_catalog(self, manager):
"""Manager should expose a Catalog with a non-empty name."""
catalog = manager.catalog
assert catalog is not None
assert isinstance(catalog.name, str)
assert len(catalog.name) > 0
- def test_discover_eps_returns_ep_info(self, manager):
+ @pytest.mark.asyncio
+ async def test_discover_eps_returns_ep_info(self, manager):
original_core = manager._core_interop
manager._core_interop = _FakeCoreInterop(
{
@@ -49,7 +54,7 @@ def test_discover_eps_returns_ep_info(self, manager):
)
try:
- eps = manager.discover_eps()
+ eps = await manager.discover_eps()
finally:
manager._core_interop = original_core
@@ -58,7 +63,8 @@ def test_discover_eps_returns_ep_info(self, manager):
assert eps[0].name == "CUDAExecutionProvider"
assert eps[0].is_registered is True
- def test_download_and_register_eps_returns_result(self, manager):
+ @pytest.mark.asyncio
+ async def test_download_and_register_eps_returns_result(self, manager):
original_core = manager._core_interop
manager._core_interop = _FakeCoreInterop(
{
@@ -73,7 +79,7 @@ def test_download_and_register_eps_returns_result(self, manager):
)
try:
- result = manager.download_and_register_eps(["CUDAExecutionProvider"])
+ result = await manager.download_and_register_eps(["CUDAExecutionProvider"])
finally:
manager._core_interop = original_core
diff --git a/sdk/python/test/test_model.py b/sdk/python/test/test_model.py
index e2ea1509..b4e47114 100644
--- a/sdk/python/test/test_model.py
+++ b/sdk/python/test/test_model.py
@@ -6,83 +6,91 @@
from __future__ import annotations
+import pytest
+
from .conftest import TEST_MODEL_ALIAS, AUDIO_MODEL_ALIAS
class TestModel:
"""Model Tests."""
- def test_should_verify_cached_models(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_verify_cached_models(self, catalog):
"""Cached models from test-data-shared should include qwen and whisper."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
assert isinstance(cached, list)
assert len(cached) > 0
# Check qwen model is cached
qwen = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
assert qwen is not None, f"{TEST_MODEL_ALIAS} should be cached"
- assert qwen.is_cached is True
+ assert await qwen.is_cached() is True
# Check whisper model is cached
whisper = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None)
assert whisper is not None, f"{AUDIO_MODEL_ALIAS} should be cached"
- assert whisper.is_cached is True
+ assert await whisper.is_cached() is True
- def test_should_load_and_unload_model(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_load_and_unload_model(self, catalog):
"""Load/unload cycle should toggle is_loaded on the selected variant."""
- cached = catalog.get_cached_models()
+ cached = await catalog.get_cached_models()
assert len(cached) > 0
cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
assert cached_variant is not None
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
model.select_variant(cached_variant)
# Ensure it's not loaded initially (or unload if it is)
- if model.is_loaded:
- model.unload()
- assert model.is_loaded is False
+ if await model.is_loaded():
+ await model.unload()
+ assert await model.is_loaded() is False
try:
- model.load()
- assert model.is_loaded is True
+ await model.load()
+ assert await model.is_loaded() is True
- model.unload()
- assert model.is_loaded is False
+ await model.unload()
+ assert await model.is_loaded() is False
finally:
# Safety cleanup
- if model.is_loaded:
- model.unload()
+ if await model.is_loaded():
+ await model.unload()
- def test_should_expose_context_length(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_expose_context_length(self, catalog):
"""Model should expose context_length from ModelInfo metadata."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
# context_length should be None or a positive integer
ctx = model.context_length
assert ctx is None or (isinstance(ctx, int) and ctx > 0)
- def test_should_expose_modalities(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_expose_modalities(self, catalog):
"""Model should expose input_modalities and output_modalities."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
# Modalities should be None or non-empty strings
for val in (model.input_modalities, model.output_modalities):
assert val is None or (isinstance(val, str) and len(val) > 0)
- def test_should_expose_capabilities(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_expose_capabilities(self, catalog):
"""Model should expose capabilities metadata."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
caps = model.capabilities
assert caps is None or (isinstance(caps, str) and len(caps) > 0)
- def test_should_expose_supports_tool_calling(self, catalog):
+ @pytest.mark.asyncio
+ async def test_should_expose_supports_tool_calling(self, catalog):
"""Model should expose supports_tool_calling metadata."""
- model = catalog.get_model(TEST_MODEL_ALIAS)
+ model = await catalog.get_model(TEST_MODEL_ALIAS)
assert model is not None
stc = model.supports_tool_calling
assert stc is None or isinstance(stc, bool)