microsoft · prathikr · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026 · Apr 21, 2026
diff --git a/.pipelines/templates/test-python-steps.yml b/.pipelines/templates/test-python-steps.yml
@@ -128,7 +128,7 @@ steps:
         if ($LASTEXITCODE -ne 0) { throw "Windows App SDK Runtime install failed" }
       errorActionPreference: 'stop'
 
-- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0
+- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 pytest-asyncio>=1.3.0
   displayName: 'Install test dependencies'
 
 - script: python -m pytest test/ -v

diff --git a/samples/python/audio-transcription/src/app.py b/samples/python/audio-transcription/src/app.py
@@ -1,53 +1,59 @@
 # <complete_code>
 # <imports>
+import asyncio
 import sys
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
-    print()
+async def main():
+    # <init>
+    # Initialize the Foundry Local SDK
+    config = Configuration(app_name="foundry_local_samples")
+    await FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
+
+    # Download and register all execution providers.
+    current_ep = ""
+    def _ep_progress(ep_name: str, percent: float):
+        nonlocal current_ep
+        if ep_name != current_ep:
+            if current_ep:
+                print()
+            current_ep = ep_name
+        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-# Load the whisper model for speech-to-text
-model = manager.catalog.get_model("whisper-tiny")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
+    await manager.download_and_register_eps(progress_callback=_ep_progress)
+    if current_ep:
+        print()
+
+    # Load the whisper model for speech-to-text
+    model = await manager.catalog.get_model("whisper-tiny")
+    await model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%",
+            end="",
+            flush=True,
+        )
     )
-)
-print()
-model.load()
-print("Model loaded.")
-# </init>
-
-# <transcription>
-# Get the audio client and transcribe
-audio_client = model.get_audio_client()
-audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
-result = audio_client.transcribe(audio_file)
-print("Transcription:")
-print(result.text)
-# </transcription>
-
-# Clean up
-model.unload()
+    print()
+    await model.load()
+    print("Model loaded.")
+    # </init>
+
+    # <transcription>
+    # Get the audio client and transcribe
+    audio_client = model.get_audio_client()
+    audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
+    result = await audio_client.transcribe(audio_file)
+    print("Transcription:")
+    print(result.text)
+    # </transcription>
+
+    # Clean up
+    await model.unload()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/langchain-integration/src/app.py b/samples/python/langchain-integration/src/app.py
@@ -1,73 +1,80 @@
 # <complete_code>
 # <imports>
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 # </imports>
 
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
 
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+async def main():
+    # <init>
+    # Initialize the Foundry Local SDK
+    config = Configuration(app_name="foundry_local_samples")
+    await FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
 
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
+    # Download and register all execution providers.
+    current_ep = ""
+    def _ep_progress(ep_name: str, percent: float):
+        nonlocal current_ep
+        if ep_name != current_ep:
+            if current_ep:
+                print()
+            current_ep = ep_name
+        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+
+    await manager.download_and_register_eps(progress_callback=_ep_progress)
+    if current_ep:
+        print()
+
+    # Load a model
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%",
+            end="",
+            flush=True,
+        )
+    )
     print()
+    await model.load()
+    print("Model loaded.")
+
+    # Start the web service to expose an OpenAI-compatible endpoint
+    await manager.start_web_service()
+    base_url = f"{manager.urls[0]}/v1"
+    # </init>
 
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
+    # <langchain_setup>
+    # Create a LangChain ChatOpenAI instance pointing to the local endpoint
+    llm = ChatOpenAI(
+        base_url=base_url,
+        api_key="none",
+        model=model.id,
     )
-)
-print()
-model.load()
-print("Model loaded.")
+    # </langchain_setup>
 
-# Start the web service to expose an OpenAI-compatible endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-# </init>
+    # <chat_completion>
+    # Create a translation chain
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
+        ("user", "{text}")
+    ])
 
-# <langchain_setup>
-# Create a LangChain ChatOpenAI instance pointing to the local endpoint
-llm = ChatOpenAI(
-    base_url=base_url,
-    api_key="none",
-    model=model.id,
-)
-# </langchain_setup>
+    chain = prompt | llm | StrOutputParser()
 
-# <chat_completion>
-# Create a translation chain
-prompt = ChatPromptTemplate.from_messages([
-    ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
-    ("user", "{text}")
-])
+    # Run the chain
+    result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"})
+    print(f"Translation: {result}")
+    # </chat_completion>
 
-chain = prompt | llm | StrOutputParser()
+    # Clean up
+    await model.unload()
+    await manager.stop_web_service()
 
-# Run the chain
-result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"})
-print(f"Translation: {result}")
-# </chat_completion>
 
-# Clean up
-model.unload()
-manager.stop_web_service()
+if __name__ == "__main__":
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/native-chat-completions/src/app.py b/samples/python/native-chat-completions/src/app.py
@@ -1,14 +1,15 @@
 # <complete_code>
 # <imports>
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-def main():
+async def main():
     # <init>
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -21,21 +22,21 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model from the catalog
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",
             end="",
             flush=True,
         )
     )
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.")
 
     # Get a chat client
@@ -50,18 +51,18 @@ def ep_progress(ep_name: str, percent: float):
 
     # Stream the response token by token
     print("Assistant: ", end="", flush=True)
-    for chunk in client.complete_streaming_chat(messages):
+    async for chunk in client.complete_streaming_chat(messages):
         content = chunk.choices[0].delta.content
         if content:
             print(content, end="", flush=True)
     print()
     # </streaming>
 
     # Clean up
-    model.unload()
+    await model.unload()
     print("Model unloaded.")
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/tool-calling/src/app.py b/samples/python/tool-calling/src/app.py
@@ -1,5 +1,6 @@
 # <complete_code>
 # <imports>
+import asyncio
 import json
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
@@ -83,7 +84,7 @@ def calculate(expression):
 
 
 # <tool_loop>
-def process_tool_calls(messages, response, client):
+async def process_tool_calls(messages, response, client):
     """Handle tool calls in a loop until the model produces a final answer."""
     choice = response.choices[0].message
 
@@ -121,18 +122,18 @@ def process_tool_calls(messages, response, client):
             })
 
         # Send the updated conversation back
-        response = client.complete_chat(messages, tools=tools)
+        response = await client.complete_chat(messages, tools=tools)
         choice = response.choices[0].message
 
     return choice.content
 # </tool_loop>
 
 
 # <init>
-def main():
+async def main():
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -145,21 +146,21 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",
             end="",
             flush=True
         )
     )
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.")
 
     # Get a chat client
@@ -179,17 +180,17 @@ def ep_progress(ep_name: str, percent: float):
     ]
 
     print("Sending request with tools...")
-    response = client.complete_chat(messages, tools=tools)
-    answer = process_tool_calls(messages, response, client)
+    response = await client.complete_chat(messages, tools=tools)
+    answer = await process_tool_calls(messages, response, client)
 
     print(f"\nAssistant: {answer}")
 
     # Clean up
-    model.unload()
+    await model.unload()
     print("Model unloaded.")
 # </init>
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>