diff --git a/.pipelines/templates/test-python-steps.yml b/.pipelines/templates/test-python-steps.yml
index 37bfa5a8..7bde34b3 100644
--- a/.pipelines/templates/test-python-steps.yml
+++ b/.pipelines/templates/test-python-steps.yml
@@ -128,7 +128,7 @@ steps:
         if ($LASTEXITCODE -ne 0) { throw "Windows App SDK Runtime install failed" }
       errorActionPreference: 'stop'
 
-- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0
+- script: pip install coverage pytest>=7.0.0 pytest-timeout>=2.1.0 pytest-asyncio>=1.3.0
   displayName: 'Install test dependencies'
 
 - script: python -m pytest test/ -v
diff --git a/samples/python/audio-transcription/src/app.py b/samples/python/audio-transcription/src/app.py
index ca06fb28..d7e00c68 100644
--- a/samples/python/audio-transcription/src/app.py
+++ b/samples/python/audio-transcription/src/app.py
@@ -1,53 +1,59 @@
 # <complete_code>
 # <imports>
+import asyncio
 import sys
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
-
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
-
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
-    print()
+async def main():
+    # <init>
+    # Initialize the Foundry Local SDK
+    config = Configuration(app_name="foundry_local_samples")
+    await FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
+
+    # Download and register all execution providers.
+    current_ep = ""
+    def _ep_progress(ep_name: str, percent: float):
+        nonlocal current_ep
+        if ep_name != current_ep:
+            if current_ep:
+                print()
+            current_ep = ep_name
+        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-# Load the whisper model for speech-to-text
-model = manager.catalog.get_model("whisper-tiny")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
+    await manager.download_and_register_eps(progress_callback=_ep_progress)
+    if current_ep:
+        print()
+
+    # Load the whisper model for speech-to-text
+    model = await manager.catalog.get_model("whisper-tiny")
+    await model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%",
+            end="",
+            flush=True,
+        )
     )
-)
-print()
-model.load()
-print("Model loaded.")
-# </init>
-
-# <transcription>
-# Get the audio client and transcribe
-audio_client = model.get_audio_client()
-audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
-result = audio_client.transcribe(audio_file)
-print("Transcription:")
-print(result.text)
-# </transcription>
-
-# Clean up
-model.unload()
+    print()
+    await model.load()
+    print("Model loaded.")
+    # </init>
+
+    # <transcription>
+    # Get the audio client and transcribe
+    audio_client = model.get_audio_client()
+    audio_file = sys.argv[1] if len(sys.argv) > 1 else "Recording.mp3"
+    result = await audio_client.transcribe(audio_file)
+    print("Transcription:")
+    print(result.text)
+    # </transcription>
+
+    # Clean up
+    await model.unload()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/langchain-integration/src/app.py b/samples/python/langchain-integration/src/app.py
index 4f8661cd..1cfc8790 100644
--- a/samples/python/langchain-integration/src/app.py
+++ b/samples/python/langchain-integration/src/app.py
@@ -1,73 +1,80 @@
 # <complete_code>
 # <imports>
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 from langchain_openai import ChatOpenAI
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import StrOutputParser
 # </imports>
 
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
 
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+async def main():
+    # <init>
+    # Initialize the Foundry Local SDK
+    config = Configuration(app_name="foundry_local_samples")
+    await FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
 
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
+    # Download and register all execution providers.
+    current_ep = ""
+    def _ep_progress(ep_name: str, percent: float):
+        nonlocal current_ep
+        if ep_name != current_ep:
+            if current_ep:
+                print()
+            current_ep = ep_name
+        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+
+    await manager.download_and_register_eps(progress_callback=_ep_progress)
+    if current_ep:
+        print()
+
+    # Load a model
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%",
+            end="",
+            flush=True,
+        )
+    )
     print()
+    await model.load()
+    print("Model loaded.")
+
+    # Start the web service to expose an OpenAI-compatible endpoint
+    await manager.start_web_service()
+    base_url = f"{manager.urls[0]}/v1"
+    # </init>
 
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
+    # <langchain_setup>
+    # Create a LangChain ChatOpenAI instance pointing to the local endpoint
+    llm = ChatOpenAI(
+        base_url=base_url,
+        api_key="none",
+        model=model.id,
     )
-)
-print()
-model.load()
-print("Model loaded.")
+    # </langchain_setup>
 
-# Start the web service to expose an OpenAI-compatible endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-# </init>
+    # <chat_completion>
+    # Create a translation chain
+    prompt = ChatPromptTemplate.from_messages([
+        ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
+        ("user", "{text}")
+    ])
 
-# <langchain_setup>
-# Create a LangChain ChatOpenAI instance pointing to the local endpoint
-llm = ChatOpenAI(
-    base_url=base_url,
-    api_key="none",
-    model=model.id,
-)
-# </langchain_setup>
+    chain = prompt | llm | StrOutputParser()
 
-# <chat_completion>
-# Create a translation chain
-prompt = ChatPromptTemplate.from_messages([
-    ("system", "You are a translator. Translate the following text to {language}. Only output the translation, nothing else."),
-    ("user", "{text}")
-])
+    # Run the chain
+    result = await chain.ainvoke({"language": "Spanish", "text": "Hello, how are you today?"})
+    print(f"Translation: {result}")
+    # </chat_completion>
 
-chain = prompt | llm | StrOutputParser()
+    # Clean up
+    await model.unload()
+    await manager.stop_web_service()
 
-# Run the chain
-result = chain.invoke({"language": "Spanish", "text": "Hello, how are you today?"})
-print(f"Translation: {result}")
-# </chat_completion>
 
-# Clean up
-model.unload()
-manager.stop_web_service()
+if __name__ == "__main__":
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/native-chat-completions/src/app.py b/samples/python/native-chat-completions/src/app.py
index eba9df41..8fdb01f5 100644
--- a/samples/python/native-chat-completions/src/app.py
+++ b/samples/python/native-chat-completions/src/app.py
@@ -1,14 +1,15 @@
 # <complete_code>
 # <imports>
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-def main():
+async def main():
     # <init>
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -21,13 +22,13 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model from the catalog
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",
             end="",
@@ -35,7 +36,7 @@ def ep_progress(ep_name: str, percent: float):
         )
     )
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.")
 
     # Get a chat client
@@ -50,7 +51,7 @@ def ep_progress(ep_name: str, percent: float):
 
     # Stream the response token by token
     print("Assistant: ", end="", flush=True)
-    for chunk in client.complete_streaming_chat(messages):
+    async for chunk in client.complete_streaming_chat(messages):
         content = chunk.choices[0].delta.content
         if content:
             print(content, end="", flush=True)
@@ -58,10 +59,10 @@ def ep_progress(ep_name: str, percent: float):
     # </streaming>
 
     # Clean up
-    model.unload()
+    await model.unload()
     print("Model unloaded.")
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/tool-calling/src/app.py b/samples/python/tool-calling/src/app.py
index db619550..22e89518 100644
--- a/samples/python/tool-calling/src/app.py
+++ b/samples/python/tool-calling/src/app.py
@@ -1,5 +1,6 @@
 # <complete_code>
 # <imports>
+import asyncio
 import json
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
@@ -83,7 +84,7 @@ def calculate(expression):
 
 
 # <tool_loop>
-def process_tool_calls(messages, response, client):
+async def process_tool_calls(messages, response, client):
     """Handle tool calls in a loop until the model produces a final answer."""
     choice = response.choices[0].message
 
@@ -121,7 +122,7 @@ def process_tool_calls(messages, response, client):
             })
 
         # Send the updated conversation back
-        response = client.complete_chat(messages, tools=tools)
+        response = await client.complete_chat(messages, tools=tools)
         choice = response.choices[0].message
 
     return choice.content
@@ -129,10 +130,10 @@ def process_tool_calls(messages, response, client):
 
 
 # <init>
-def main():
+async def main():
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -145,13 +146,13 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",
             end="",
@@ -159,7 +160,7 @@ def ep_progress(ep_name: str, percent: float):
         )
     )
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.")
 
     # Get a chat client
@@ -179,17 +180,17 @@ def ep_progress(ep_name: str, percent: float):
     ]
 
     print("Sending request with tools...")
-    response = client.complete_chat(messages, tools=tools)
-    answer = process_tool_calls(messages, response, client)
+    response = await client.complete_chat(messages, tools=tools)
+    answer = await process_tool_calls(messages, response, client)
 
     print(f"\nAssistant: {answer}")
 
     # Clean up
-    model.unload()
+    await model.unload()
     print("Model unloaded.")
 # </init>
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/tutorial-chat-assistant/src/app.py b/samples/python/tutorial-chat-assistant/src/app.py
index 13f1c500..2650c0a2 100644
--- a/samples/python/tutorial-chat-assistant/src/app.py
+++ b/samples/python/tutorial-chat-assistant/src/app.py
@@ -1,14 +1,15 @@
 # <complete_code>
 # <imports>
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-def main():
+async def main():
     # <init>
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -21,15 +22,15 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model from the catalog
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True))
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(lambda progress: print(f"\rDownloading model: {progress:.2f}%", end="", flush=True))
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.")
 
     # Get a chat client
@@ -62,7 +63,7 @@ def ep_progress(ep_name: str, percent: float):
         # Stream the response token by token
         print("Assistant: ", end="", flush=True)
         full_response = ""
-        for chunk in client.complete_streaming_chat(messages):
+        async for chunk in client.complete_streaming_chat(messages):
             content = chunk.choices[0].delta.content
             if content:
                 print(content, end="", flush=True)
@@ -75,10 +76,10 @@ def ep_progress(ep_name: str, percent: float):
     # </conversation_loop>
 
     # Clean up - unload the model
-    model.unload()
+    await model.unload()
     print("Model unloaded. Goodbye!")
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/tutorial-document-summarizer/src/app.py b/samples/python/tutorial-document-summarizer/src/app.py
index 055bb992..fb50f6ad 100644
--- a/samples/python/tutorial-document-summarizer/src/app.py
+++ b/samples/python/tutorial-document-summarizer/src/app.py
@@ -1,23 +1,24 @@
 # <complete_code>
 # <imports>
+import asyncio
 import sys
 from pathlib import Path
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-def summarize_file(client, file_path, system_prompt):
+async def summarize_file(client, file_path, system_prompt):
     """Summarize a single file and print the result."""
     content = Path(file_path).read_text(encoding="utf-8")
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": content}
     ]
-    response = client.complete_chat(messages)
+    response = await client.complete_chat(messages)
     print(response.choices[0].message.content)
 
 
-def summarize_directory(client, directory, system_prompt):
+async def summarize_directory(client, directory, system_prompt):
     """Summarize all .txt files in a directory."""
     txt_files = sorted(Path(directory).glob("*.txt"))
 
@@ -27,15 +28,15 @@ def summarize_directory(client, directory, system_prompt):
 
     for txt_file in txt_files:
         print(f"--- {txt_file.name} ---")
-        summarize_file(client, txt_file, system_prompt)
+        await summarize_file(client, txt_file, system_prompt)
         print()
 
 
-def main():
+async def main():
     # <init>
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -48,15 +49,15 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model from the catalog
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True))
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(lambda p: print(f"\rDownloading model: {p:.2f}%", end="", flush=True))
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.\n")
 
     # Get a chat client
@@ -75,17 +76,17 @@ def ep_progress(ep_name: str, percent: float):
     # </file_reading>
 
     if target_path.is_dir():
-        summarize_directory(client, target_path, system_prompt)
+        await summarize_directory(client, target_path, system_prompt)
     else:
         print(f"--- {target_path.name} ---")
-        summarize_file(client, target_path, system_prompt)
+        await summarize_file(client, target_path, system_prompt)
     # </summarization>
 
     # Clean up
-    model.unload()
+    await model.unload()
     print("\nModel unloaded. Done!")
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/tutorial-tool-calling/src/app.py b/samples/python/tutorial-tool-calling/src/app.py
index bb22bfe0..097432c2 100644
--- a/samples/python/tutorial-tool-calling/src/app.py
+++ b/samples/python/tutorial-tool-calling/src/app.py
@@ -1,5 +1,6 @@
 # <complete_code>
 # <imports>
+import asyncio
 import json
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
@@ -83,7 +84,7 @@ def calculate(expression):
 
 
 # <tool_loop>
-def process_tool_calls(messages, response, client):
+async def process_tool_calls(messages, response, client):
     """Handle tool calls in a loop until the model produces a final answer."""
     choice = response.choices[0].message
 
@@ -121,7 +122,7 @@ def process_tool_calls(messages, response, client):
             })
 
         # Send the updated conversation back
-        response = client.complete_chat(messages, tools=tools)
+        response = await client.complete_chat(messages, tools=tools)
         choice = response.choices[0].message
 
     return choice.content
@@ -129,10 +130,10 @@ def process_tool_calls(messages, response, client):
 
 
 # <init>
-def main():
+async def main():
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Download and register all execution providers.
@@ -145,13 +146,13 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # Select and load a model
-    model = manager.catalog.get_model("qwen2.5-0.5b")
-    model.download(
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
         lambda progress: print(
             f"\rDownloading model: {progress:.2f}%",
             end="",
@@ -159,7 +160,7 @@ def ep_progress(ep_name: str, percent: float):
         )
     )
     print()
-    model.load()
+    await model.load()
     print("Model loaded and ready.")
 
     # Get a chat client
@@ -183,18 +184,18 @@ def ep_progress(ep_name: str, percent: float):
 
         messages.append({"role": "user", "content": user_input})
 
-        response = client.complete_chat(messages, tools=tools)
-        answer = process_tool_calls(messages, response, client)
+        response = await client.complete_chat(messages, tools=tools)
+        answer = await process_tool_calls(messages, response, client)
 
         messages.append({"role": "assistant", "content": answer})
         print(f"Assistant: {answer}\n")
 
     # Clean up
-    model.unload()
+    await model.unload()
     print("Model unloaded. Goodbye!")
 # </init>
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/tutorial-voice-to-text/src/app.py b/samples/python/tutorial-voice-to-text/src/app.py
index 8ebbba1b..9585f6f8 100644
--- a/samples/python/tutorial-voice-to-text/src/app.py
+++ b/samples/python/tutorial-voice-to-text/src/app.py
@@ -1,14 +1,15 @@
 # <complete_code>
 # <imports>
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
 
-def main():
+async def main():
     # <init>
     # Initialize the Foundry Local SDK
     config = Configuration(app_name="foundry_local_samples")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
     # </init>
 
@@ -22,14 +23,14 @@ def ep_progress(ep_name: str, percent: float):
             current_ep = ep_name
         print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
 
-    manager.download_and_register_eps(progress_callback=ep_progress)
+    await manager.download_and_register_eps(progress_callback=ep_progress)
     if current_ep:
         print()
 
     # <transcription>
     # Load the speech-to-text model
-    speech_model = manager.catalog.get_model("whisper-tiny")
-    speech_model.download(
+    speech_model = await manager.catalog.get_model("whisper-tiny")
+    await speech_model.download(
         lambda progress: print(
             f"\rDownloading speech model: {progress:.2f}%",
             end="",
@@ -37,22 +38,22 @@ def ep_progress(ep_name: str, percent: float):
         )
     )
     print()
-    speech_model.load()
+    await speech_model.load()
     print("Speech model loaded.")
 
     # Transcribe the audio file
     audio_client = speech_model.get_audio_client()
-    transcription = audio_client.transcribe("meeting-notes.wav")
+    transcription = await audio_client.transcribe("meeting-notes.wav")
     print(f"\nTranscription:\n{transcription.text}")
 
     # Unload the speech model to free memory
-    speech_model.unload()
+    await speech_model.unload()
     # </transcription>
 
     # <summarization>
     # Load the chat model for summarization
-    chat_model = manager.catalog.get_model("qwen2.5-0.5b")
-    chat_model.download(
+    chat_model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await chat_model.download(
         lambda progress: print(
             f"\rDownloading chat model: {progress:.2f}%",
             end="",
@@ -60,7 +61,7 @@ def ep_progress(ep_name: str, percent: float):
         )
     )
     print()
-    chat_model.load()
+    await chat_model.load()
     print("Chat model loaded.")
 
     # Summarize the transcription into organized notes
@@ -76,16 +77,16 @@ def ep_progress(ep_name: str, percent: float):
         {"role": "user", "content": transcription.text},
     ]
 
-    response = client.complete_chat(messages)
+    response = await client.complete_chat(messages)
     summary = response.choices[0].message.content
     print(f"\nSummary:\n{summary}")
 
     # Clean up
-    chat_model.unload()
+    await chat_model.unload()
     print("\nDone. Models unloaded.")
     # </summarization>
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
 # </complete_code>
diff --git a/samples/python/web-server/src/app.py b/samples/python/web-server/src/app.py
index 67117029..c4fe9c1e 100644
--- a/samples/python/web-server/src/app.py
+++ b/samples/python/web-server/src/app.py
@@ -1,73 +1,80 @@
 # <complete_code>
 # <imports>
+import asyncio
 import openai
 from foundry_local_sdk import Configuration, FoundryLocalManager
 # </imports>
 
-# <init>
-# Initialize the Foundry Local SDK
-config = Configuration(app_name="foundry_local_samples")
-FoundryLocalManager.initialize(config)
-manager = FoundryLocalManager.instance
 
-# Download and register all execution providers.
-current_ep = ""
-def _ep_progress(ep_name: str, percent: float):
-    global current_ep
-    if ep_name != current_ep:
-        if current_ep:
-            print()
-        current_ep = ep_name
-    print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+async def main():
+    # <init>
+    # Initialize the Foundry Local SDK
+    config = Configuration(app_name="foundry_local_samples")
+    await FoundryLocalManager.initialize(config)
+    manager = FoundryLocalManager.instance
 
-manager.download_and_register_eps(progress_callback=_ep_progress)
-if current_ep:
+    # Download and register all execution providers.
+    current_ep = ""
+    def _ep_progress(ep_name: str, percent: float):
+        nonlocal current_ep
+        if ep_name != current_ep:
+            if current_ep:
+                print()
+            current_ep = ep_name
+        print(f"\r  {ep_name:<30}  {percent:5.1f}%", end="", flush=True)
+
+    await manager.download_and_register_eps(progress_callback=_ep_progress)
+    if current_ep:
+        print()
+
+    # Load a model
+    model = await manager.catalog.get_model("qwen2.5-0.5b")
+    await model.download(
+        lambda progress: print(
+            f"\rDownloading model: {progress:.2f}%",
+            end="",
+            flush=True,
+        )
+    )
     print()
+    await model.load()
+    print("Model loaded.")
 
-# Load a model
-model = manager.catalog.get_model("qwen2.5-0.5b")
-model.download(
-    lambda progress: print(
-        f"\rDownloading model: {progress:.2f}%",
-        end="",
-        flush=True,
+    # Start the web service to expose an OpenAI-compatible REST endpoint
+    await manager.start_web_service()
+    base_url = f"{manager.urls[0]}/v1"
+    # </init>
+
+    # <rest_client>
+    # Use the OpenAI SDK to connect to the local REST endpoint
+    client = openai.OpenAI(
+        base_url=base_url,
+        api_key="none",
     )
-)
-print()
-model.load()
-print("Model loaded.")
+    # </rest_client>
 
-# Start the web service to expose an OpenAI-compatible REST endpoint
-manager.start_web_service()
-base_url = f"{manager.urls[0]}/v1"
-# </init>
+    # <chat_completion>
+    # Make a chat completion request via the REST API
+    response = client.chat.completions.create(
+        model=model.id,
+        messages=[
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "What is the golden ratio?"}
+        ],
+        stream=True,
+    )
 
-# <rest_client>
-# Use the OpenAI SDK to connect to the local REST endpoint
-client = openai.OpenAI(
-    base_url=base_url,
-    api_key="none",
-)
-# </rest_client>
+    for chunk in response:
+        if chunk.choices[0].delta.content is not None:
+            print(chunk.choices[0].delta.content, end="", flush=True)
+    print()
+    # </chat_completion>
 
-# <chat_completion>
-# Make a chat completion request via the REST API
-response = client.chat.completions.create(
-    model=model.id,
-    messages=[
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "What is the golden ratio?"}
-    ],
-    stream=True,
-)
+    # Clean up
+    await model.unload()
+    await manager.stop_web_service()
 
-for chunk in response:
-    if chunk.choices[0].delta.content is not None:
-        print(chunk.choices[0].delta.content, end="", flush=True)
-print()
-# </chat_completion>
 
-# Clean up
-model.unload()
-manager.stop_web_service()
+if __name__ == "__main__":
+    asyncio.run(main())
 # </complete_code>
diff --git a/sdk/python/examples/chat_completion.py b/sdk/python/examples/chat_completion.py
index c0c58048..fabd89d0 100644
--- a/sdk/python/examples/chat_completion.py
+++ b/sdk/python/examples/chat_completion.py
@@ -10,31 +10,32 @@
 including model discovery, loading, and inference.
 """
 
+import asyncio
 from foundry_local_sdk import Configuration, FoundryLocalManager
 
-def main():
+async def main():
     # 1. Initialize the SDK
     config = Configuration(app_name="ChatCompletionExample")
     print("Initializing Foundry Local Manager")
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     manager = FoundryLocalManager.instance
 
     # Discover available EPs and register them explicitly when needed.
-    eps = manager.discover_eps()
+    eps = await manager.discover_eps()
     print("Available execution providers:")
     for ep in eps:
         print(f"  - {ep.name} (registered: {ep.is_registered})")
 
-    ep_result = manager.download_and_register_eps()
+    ep_result = await manager.download_and_register_eps()
     print(f"EP registration success: {ep_result.success} ({ep_result.status})")
 
     # 2. Print available models in the catalog and cache
-    models = manager.catalog.list_models()
+    models = await manager.catalog.list_models()
     print("Available models in catalog:")
     for m in models:
         print(f"  - {m.alias} ({m.id})")
 
-    cached_models = manager.catalog.get_cached_models()
+    cached_models = await manager.catalog.get_cached_models()
     print("\nCached models:")
     for m in cached_models:
         print(f"  - {m.alias} ({m.id})")
@@ -42,22 +43,22 @@ def main():
     CACHED_MODEL_ALIAS = "qwen2.5-0.5b"
 
     # 3. Find a model from the cache (+ download if not cached)
-    model = manager.catalog.get_model(CACHED_MODEL_ALIAS)
+    model = await manager.catalog.get_model(CACHED_MODEL_ALIAS)
     if model is None:
         print(f"Model '{CACHED_MODEL_ALIAS}' not found in catalog.")
         print("Available models:")
-        for m in manager.catalog.list_models():
+        for m in await manager.catalog.list_models():
             print(f"  - {m.alias} ({m.id})")
         return
 
-    if not model.is_cached:
+    if not await model.is_cached():
         print(f"Downloading {model.alias}...")
-        model.download(progress_callback=lambda pct: print(f"  {pct:.1f}%", end="\r"))
+        await model.download(progress_callback=lambda pct: print(f"  {pct:.1f}%", end="\r"))
         print()
 
     # 4. Load the model
     print(f"Loading {model.alias}...", end="")
-    model.load()
+    await model.load()
     print("loaded!")
 
     try:
@@ -65,14 +66,14 @@ def main():
         client = model.get_chat_client()
 
         print("\n--- Non-streaming ---")
-        response = client.complete_chat(
+        response = await client.complete_chat(
             messages=[{"role": "user", "content": "What is the capital of France? Reply briefly."}]
         )
         print(f"Response: {response.choices[0].message.content}")
 
         # 6. Streaming
         print("\n--- Streaming ---")
-        for chunk in client.complete_streaming_chat(
+        async for chunk in client.complete_streaming_chat(
             [{"role": "user", "content": "Tell me a short joke."}]
         ):
             if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
@@ -84,9 +85,9 @@ def main():
 
     finally:
         # 7. Cleanup
-        model.unload()
+        await model.unload()
         print("\nModel unloaded.")
 
 
 if __name__ == "__main__":
-    main()
+    asyncio.run(main())
diff --git a/sdk/python/pyproject.toml b/sdk/python/pyproject.toml
index ef93b6f7..851ea0a6 100644
--- a/sdk/python/pyproject.toml
+++ b/sdk/python/pyproject.toml
@@ -53,3 +53,5 @@ python_files = ["test_*.py"]
 python_classes = ["Test*"]
 python_functions = ["test_*"]
 timeout = 60
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
diff --git a/sdk/python/requirements-dev.txt b/sdk/python/requirements-dev.txt
index aea40875..c581a02b 100644
--- a/sdk/python/requirements-dev.txt
+++ b/sdk/python/requirements-dev.txt
@@ -2,4 +2,5 @@
 build
 coverage
 pytest
+pytest-asyncio
 pytest-timeout
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt
index 666a3721..28daade2 100644
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -1,8 +1,7 @@
 pydantic>=2.0.0
 requests>=2.32.4
 openai>=2.24.0
-# Standard native binary packages from the ORT-Nightly PyPI feed.
-foundry-local-core==1.0.0rc1
+foundry-local-core==1.0.0
 onnxruntime-core==1.24.4; sys_platform != "linux"
 onnxruntime-gpu==1.24.4; sys_platform == "linux"
 onnxruntime-genai-core==0.13.1; sys_platform != "linux"
diff --git a/sdk/python/src/catalog.py b/sdk/python/src/catalog.py
index 51f5bd8f..b79e2473 100644
--- a/sdk/python/src/catalog.py
+++ b/sdk/python/src/catalog.py
@@ -5,9 +5,9 @@
 
 from __future__ import annotations
 
+import asyncio
 import datetime
 import logging
-import threading
 from typing import List, Optional
 from pydantic import TypeAdapter
 
@@ -38,26 +38,26 @@ def __init__(self, model_load_manager: ModelLoadManager, core_interop: CoreInter
         """
         self._core_interop = core_interop
         self._model_load_manager = model_load_manager
-        self._lock = threading.Lock()
+        self._lock = asyncio.Lock()
 
         self._models: List[ModelInfo] = []
         self._model_alias_to_model = {}
         self._model_id_to_model_variant = {}
         self._last_fetch = datetime.datetime.min
 
-        response = core_interop.execute_command("get_catalog_name")
+        response = core_interop._execute_command("get_catalog_name")
         if response.error is not None:
             raise FoundryLocalException(f"Failed to get catalog name: {response.error}")
 
         self.name = response.data
 
-    def _update_models(self):
-        with self._lock:
+    async def _update_models(self):
+        async with self._lock:
             # refresh every 6 hours
             if (datetime.datetime.now() - self._last_fetch) < datetime.timedelta(hours=6):
                 return
 
-            response = self._core_interop.execute_command("get_model_list")
+            response = await self._core_interop.execute_command("get_model_list")
             if response.error is not None:
                 raise FoundryLocalException(f"Failed to get model list: {response.error}")
 
@@ -84,28 +84,28 @@ def _update_models(self):
             self._models = models
             self._last_fetch = datetime.datetime.now()
 
-    def _invalidate_cache(self):
-        with self._lock:
+    async def _invalidate_cache(self):
+        async with self._lock:
             self._last_fetch = datetime.datetime.min
 
-    def list_models(self) -> List[IModel]:
+    async def list_models(self) -> List[IModel]:
         """
         List the available models in the catalog.
         :return: List of IModel instances.
         """
-        self._update_models()
+        await self._update_models()
         return list(self._model_alias_to_model.values())
 
-    def get_model(self, model_alias: str) -> Optional[IModel]:
+    async def get_model(self, model_alias: str) -> Optional[IModel]:
         """
         Lookup a model by its alias.
         :param model_alias: Model alias.
         :return: IModel if found.
         """
-        self._update_models()
+        await self._update_models()
         return self._model_alias_to_model.get(model_alias)
 
-    def get_model_variant(self, model_id: str) -> Optional[IModel]:
+    async def get_model_variant(self, model_id: str) -> Optional[IModel]:
         """
         Lookup a model variant by its unique model id.
         NOTE: This will return an IModel with a single variant. Use get_model to get an IModel with all available
@@ -113,10 +113,10 @@ def get_model_variant(self, model_id: str) -> Optional[IModel]:
         :param model_id: Model id.
         :return: IModel if found.
         """
-        self._update_models()
+        await self._update_models()
         return self._model_id_to_model_variant.get(model_id)
 
-    def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
+    async def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
         """
         Resolve the latest catalog version for the provided model or variant.
 
@@ -124,7 +124,7 @@ def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
         :return: Latest catalog version for the same model name.
         :raises FoundryLocalException: If the alias or name cannot be resolved.
         """
-        self._update_models()
+        await self._update_models()
 
         model = self._model_alias_to_model.get(model_or_model_variant.alias)
         if model is None:
@@ -144,14 +144,14 @@ def get_latest_version(self, model_or_model_variant: IModel) -> IModel:
 
         return model_or_model_variant if latest.id == model_or_model_variant.id else latest
 
-    def get_cached_models(self) -> List[IModel]:
+    async def get_cached_models(self) -> List[IModel]:
         """
         Get a list of currently downloaded models from the model cache.
         :return: List of IModel instances.
         """
-        self._update_models()
+        await self._update_models()
 
-        cached_model_ids = get_cached_model_ids(self._core_interop)
+        cached_model_ids = await get_cached_model_ids(self._core_interop)
 
         cached_models: List[IModel] = []
         for model_id in cached_model_ids:
@@ -161,14 +161,14 @@ def get_cached_models(self) -> List[IModel]:
 
         return cached_models
 
-    def get_loaded_models(self) -> List[IModel]:
+    async def get_loaded_models(self) -> List[IModel]:
         """
         Get a list of the currently loaded models.
         :return: List of IModel instances.
         """
-        self._update_models()
+        await self._update_models()
 
-        loaded_model_ids = self._model_load_manager.list_loaded()
+        loaded_model_ids = await self._model_load_manager.list_loaded()
         loaded_models: List[IModel] = []
         
         for model_id in loaded_model_ids:
diff --git a/sdk/python/src/detail/core_interop.py b/sdk/python/src/detail/core_interop.py
index 1cd53e33..aa520f03 100644
--- a/sdk/python/src/detail/core_interop.py
+++ b/sdk/python/src/detail/core_interop.py
@@ -5,11 +5,13 @@
 
 from __future__ import annotations
 
+import asyncio
 import ctypes
 import json
 import logging
 import os
 import sys
+import threading
 
 from dataclasses import dataclass
 from pathlib import Path
@@ -104,6 +106,9 @@ class CoreInterop:
 
     instance = None
 
+    # Serialize native calls — the underlying C library may not be thread-safe.
+    _native_lock = threading.Lock()
+
     # Callback function for native interop.
     # Returns c_int: 0 = continue, 1 = cancel.
     CALLBACK_TYPE = ctypes.CFUNCTYPE(ctypes.c_int, ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)
@@ -218,7 +223,7 @@ def __init__(self, config: Configuration):
                         config.additional_settings["Bootstrap"] = "true"
 
         request = InteropRequest(params=config.as_dictionary())
-        response = self.execute_command("initialize", request)
+        response = self._execute_command("initialize", request)
         if response.error is not None:
             raise FoundryLocalException(f"Failed to initialize Foundry.Local.Core: {response.error}")
 
@@ -226,41 +231,42 @@ def __init__(self, config: Configuration):
 
     def _execute_command(self, command: str, interop_request: InteropRequest = None,
                          callback: CoreInterop.CALLBACK_TYPE = None):
-        cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command)
-        data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None)
+        with CoreInterop._native_lock:
+            cmd_ptr, cmd_len, cmd_buf = CoreInterop._to_c_buffer(command)
+            data_ptr, data_len, data_buf = CoreInterop._to_c_buffer(interop_request.to_json() if interop_request else None)
 
-        req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len)
-        resp = ResponseBuffer()
-        lib = CoreInterop._flcore_library
+            req = RequestBuffer(Command=cmd_ptr, CommandLength=cmd_len, Data=data_ptr, DataLength=data_len)
+            resp = ResponseBuffer()
+            lib = CoreInterop._flcore_library
 
-        if (callback is not None):
-            # If a callback is provided, use the execute_command_with_callback method
-            # We need a helper to do the initial conversion from ctypes to Python and pass it through to the
-            # provided callback function
-            callback_helper = CallbackHelper(callback)
-            callback_py_obj = ctypes.py_object(callback_helper)
-            callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p)
-            callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback)
+            if (callback is not None):
+                # If a callback is provided, use the execute_command_with_callback method
+                # We need a helper to do the initial conversion from ctypes to Python and pass it through to the
+                # provided callback function
+                callback_helper = CallbackHelper(callback)
+                callback_py_obj = ctypes.py_object(callback_helper)
+                callback_helper_ptr = ctypes.cast(ctypes.pointer(callback_py_obj), ctypes.c_void_p)
+                callback_fn = CoreInterop.CALLBACK_TYPE(CallbackHelper.callback)
 
-            lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr)
+                lib.execute_command_with_callback(ctypes.byref(req), ctypes.byref(resp), callback_fn, callback_helper_ptr)
 
-            if callback_helper.exception is not None:
-                raise callback_helper.exception
-        else:
-            lib.execute_command(ctypes.byref(req), ctypes.byref(resp))
+                if callback_helper.exception is not None:
+                    raise callback_helper.exception
+            else:
+                lib.execute_command(ctypes.byref(req), ctypes.byref(resp))
 
-        req = None  # Free Python reference to request
+            req = None  # Free Python reference to request
 
-        response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None
-        error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None
+            response_str = ctypes.string_at(resp.Data, resp.DataLength).decode("utf-8") if resp.Data else None
+            error_str = ctypes.string_at(resp.Error, resp.ErrorLength).decode("utf-8") if resp.Error else None
 
-        # C# owns the memory in the response so we need to free it explicitly
-        lib.free_response(resp)
-        
-        return Response(data=response_str, error=error_str)
+            # C# owns the memory in the response so we need to free it explicitly
+            lib.free_response(resp)
+            
+            return Response(data=response_str, error=error_str)
 
-    def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response:
-        """Execute a command synchronously.
+    async def execute_command(self, command_name: str, command_input: Optional[InteropRequest] = None) -> Response:
+        """Execute a command asynchronously.
 
         Args:
             command_name: The native command name (e.g. ``"get_model_list"``).
@@ -272,10 +278,9 @@ def execute_command(self, command_name: str, command_input: Optional[InteropRequ
         logger.debug("Executing command: %s Input: %s", command_name,
                      command_input.params if command_input else None)
 
-        response = self._execute_command(command_name, command_input)
-        return response
+        return await asyncio.to_thread(self._execute_command, command_name, command_input)
 
-    def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest],
+    async def execute_command_with_callback(self, command_name: str, command_input: Optional[InteropRequest],
                                       callback: Callable[[str], None]) -> Response:
         """Execute a command with a streaming callback.
 
@@ -292,14 +297,13 @@ def execute_command_with_callback(self, command_name: str, command_input: Option
         """
         logger.debug("Executing command with callback: %s Input: %s", command_name,
                      command_input.params if command_input else None)
-        response = self._execute_command(command_name, command_input, callback)
-        return response
+        return await asyncio.to_thread(self._execute_command, command_name, command_input, callback)
 
 
-def get_cached_model_ids(core_interop: CoreInterop) -> list[str]:
+async def get_cached_model_ids(core_interop: CoreInterop) -> list[str]:
     """Get the list of models that have been downloaded and are cached."""
 
-    response = core_interop.execute_command("get_cached_models")
+    response = await core_interop.execute_command("get_cached_models")
     if response.error is not None:
         raise FoundryLocalException(f"Failed to get cached models: {response.error}")
 
diff --git a/sdk/python/src/detail/model.py b/sdk/python/src/detail/model.py
index 189920b1..9f13d23e 100644
--- a/sdk/python/src/detail/model.py
+++ b/sdk/python/src/detail/model.py
@@ -104,35 +104,33 @@ def supports_tool_calling(self) -> Optional[bool]:
         """Whether the currently selected variant supports tool/function calling."""
         return self._selected_variant.supports_tool_calling
 
-    @property
-    def is_cached(self) -> bool:
+    async def is_cached(self) -> bool:
         """Is the currently selected variant cached locally?"""
-        return self._selected_variant.is_cached
+        return await self._selected_variant.is_cached()
 
-    @property
-    def is_loaded(self) -> bool:
+    async def is_loaded(self) -> bool:
         """Is the currently selected variant loaded in memory?"""
-        return self._selected_variant.is_loaded
+        return await self._selected_variant.is_loaded()
 
-    def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None:
+    async def download(self, progress_callback: Optional[Callable[[float], None]] = None) -> None:
         """Download the currently selected variant."""
-        self._selected_variant.download(progress_callback)
+        await self._selected_variant.download(progress_callback)
 
-    def get_path(self) -> str:
+    async def get_path(self) -> str:
         """Get the path to the currently selected variant."""
-        return self._selected_variant.get_path()
+        return await self._selected_variant.get_path()
 
-    def load(self) -> None:
+    async def load(self) -> None:
         """Load the currently selected variant into memory."""
-        self._selected_variant.load()
+        await self._selected_variant.load()
 
-    def unload(self) -> None:
+    async def unload(self) -> None:
         """Unload the currently selected variant from memory."""
-        self._selected_variant.unload()
+        await self._selected_variant.unload()
 
-    def remove_from_cache(self) -> None:
+    async def remove_from_cache(self) -> None:
         """Remove the currently selected variant from the local cache."""
-        self._selected_variant.remove_from_cache()
+        await self._selected_variant.remove_from_cache()
 
     def get_chat_client(self) -> ChatClient:
         """Get a chat client for the currently selected variant."""
diff --git a/sdk/python/src/detail/model_load_manager.py b/sdk/python/src/detail/model_load_manager.py
index 8ffd087a..2633d18c 100644
--- a/sdk/python/src/detail/model_load_manager.py
+++ b/sdk/python/src/detail/model_load_manager.py
@@ -4,6 +4,7 @@
 # --------------------------------------------------------------------------
 from __future__ import annotations
 
+import asyncio
 import json
 import logging
 import requests
@@ -32,7 +33,7 @@ def __init__(self, core_interop: CoreInterop, external_service_url: str = None):
         self._core_interop = core_interop
         self._external_service_url = external_service_url
 
-    def load(self, model_id: str) -> None:
+    async def load(self, model_id: str) -> None:
         """
         Load a model by its ID.
 
@@ -47,37 +48,37 @@ def load(self, model_id: str) -> None:
             communicating with the external service.
         """
         if self._external_service_url:
-            self._web_load_model(model_id)
+            await self._web_load_model(model_id)
             return
 
         request = InteropRequest({"Model": model_id})
-        response = self._core_interop.execute_command("load_model", request)
+        response = await self._core_interop.execute_command("load_model", request)
         if response.error is not None:
             raise FoundryLocalException(f"Failed to load model {model_id}: {response.error}")
 
-    def unload(self, model_id: str) -> None:
+    async def unload(self, model_id: str) -> None:
         """
         Unload a model by its ID.
         :param model_id: The ID of the model to unload.
         """
         if self._external_service_url:
-            self._web_unload_model(model_id)
+            await self._web_unload_model(model_id)
             return
     
         request = InteropRequest({"Model": model_id})
-        response = self._core_interop.execute_command("unload_model", request)
+        response = await self._core_interop.execute_command("unload_model", request)
         if response.error is not None:
             raise FoundryLocalException(f"Failed to unload model {model_id}: {response.error}")
 
-    def list_loaded(self) -> list[str]:
+    async def list_loaded(self) -> list[str]:
         """
         List loaded models.
         :return: List of loaded model IDs
         """
         if self._external_service_url:
-            return self._web_list_loaded_models()
+            return await self._web_list_loaded_models()
 
-        response = self._core_interop.execute_command("list_loaded_models")
+        response = await self._core_interop.execute_command("list_loaded_models")
         if response.error is not None:
             raise FoundryLocalException(f"Failed to list loaded models: {response.error}")
 
@@ -88,9 +89,12 @@ def list_loaded(self) -> list[str]:
 
         return model_ids
 
-    def _web_list_loaded_models(self) -> List[str]:
+    async def _web_list_loaded_models(self) -> List[str]:
         try:
-            response = requests.get(f"{self._external_service_url}/models/loaded", headers=self._headers, timeout=10)
+            response = await asyncio.to_thread(
+                requests.get, f"{self._external_service_url}/models/loaded",
+                headers=self._headers, timeout=10
+            )
 
             if not response.ok:
                 raise FoundryLocalException(
@@ -109,7 +113,7 @@ def _web_list_loaded_models(self) -> List[str]:
         except json.JSONDecodeError as e:
             raise FoundryLocalException(f"Failed to decode JSON response: Response was: {content}") from e
 
-    def _web_load_model(self, model_id: str) -> None:
+    async def _web_load_model(self, model_id: str) -> None:
         """
         Load a model via the external web service.
  
@@ -126,7 +130,7 @@ def _web_load_model(self, model_id: str) -> None:
             # }
             # response = requests.get(url, params=query_params)
 
-            response = requests.get(url, headers=self._headers, timeout=10)
+            response = await asyncio.to_thread(requests.get, url, headers=self._headers, timeout=10)
 
             if not response.ok:
                 raise FoundryLocalException(
@@ -143,12 +147,12 @@ def _web_load_model(self, model_id: str) -> None:
                 f"HTTP request failed when loading model {model_id} from {self._external_service_url}: {e}"
             ) from e
 
-    def _web_unload_model(self, model_id: str) -> None:
+    async def _web_unload_model(self, model_id: str) -> None:
         try:
             encoded_model_id = quote(model_id)
             url = f"{self._external_service_url}/models/unload/{encoded_model_id}"
 
-            response = requests.get(url, headers=self._headers, timeout=10)
+            response = await asyncio.to_thread(requests.get, url, headers=self._headers, timeout=10)
 
             if not response.ok:
                 raise FoundryLocalException(
diff --git a/sdk/python/src/detail/model_variant.py b/sdk/python/src/detail/model_variant.py
index a5ac02d4..e6fab104 100644
--- a/sdk/python/src/detail/model_variant.py
+++ b/sdk/python/src/detail/model_variant.py
@@ -4,6 +4,7 @@
 # --------------------------------------------------------------------------
 from __future__ import annotations
 
+import asyncio
 import logging
 from typing import Callable, List, Optional
 
@@ -99,19 +100,17 @@ def supports_tool_calling(self) -> Optional[bool]:
         """Whether this variant supports tool/function calling, or ``None`` if unknown."""
         return self._model_info.supports_tool_calling
 
-    @property
-    def is_cached(self) -> bool:
+    async def is_cached(self) -> bool:
         """``True`` if this variant is present in the local model cache."""
-        cached_model_ids = get_cached_model_ids(self._core_interop)
+        cached_model_ids = await get_cached_model_ids(self._core_interop)
         return self.id in cached_model_ids
 
-    @property
-    def is_loaded(self) -> bool:
+    async def is_loaded(self) -> bool:
         """``True`` if this variant is currently loaded into memory."""
-        loaded_model_ids = self._model_load_manager.list_loaded()
+        loaded_model_ids = await self._model_load_manager.list_loaded()
         return self.id in loaded_model_ids
 
-    def download(self, progress_callback: Callable[[float], None] = None):
+    async def download(self, progress_callback: Callable[[float], None] = None):
         """Download this variant to the local cache.
 
         Args:
@@ -120,18 +119,19 @@ def download(self, progress_callback: Callable[[float], None] = None):
         """
         request = InteropRequest(params={"Model": self.id})
         if progress_callback is None:
-            response = self._core_interop.execute_command("download_model", request)
+            response = await self._core_interop.execute_command("download_model", request)
         else:
-            response = self._core_interop.execute_command_with_callback(
+            loop = asyncio.get_running_loop()
+            response = await self._core_interop.execute_command_with_callback(
                 "download_model", request,
-                lambda pct_str: progress_callback(float(pct_str))
+                lambda pct_str: loop.call_soon_threadsafe(progress_callback, float(pct_str))
             )
 
         logger.info("Download response: %s", response)
         if response.error is not None:
             raise FoundryLocalException(f"Failed to download model: {response.error}")
 
-    def get_path(self) -> str:
+    async def get_path(self) -> str:
         """Get the local file-system path to this variant if cached.
 
         Returns:
@@ -141,27 +141,27 @@ def get_path(self) -> str:
             FoundryLocalException: If the model path cannot be retrieved.
         """
         request = InteropRequest(params={"Model": self.id})
-        response = self._core_interop.execute_command("get_model_path", request)
+        response = await self._core_interop.execute_command("get_model_path", request)
         if response.error is not None:
             raise FoundryLocalException(f"Failed to get model path: {response.error}")
 
         return response.data
 
-    def load(self) -> None:
+    async def load(self) -> None:
         """Load this variant into memory for inference."""
-        self._model_load_manager.load(self.id)
+        await self._model_load_manager.load(self.id)
 
-    def remove_from_cache(self) -> None:
+    async def remove_from_cache(self) -> None:
         """Remove this variant from the local model cache."""
         request = InteropRequest(params={"Model": self.id})
-        response = self._core_interop.execute_command("remove_cached_model", request)
+        response = await self._core_interop.execute_command("remove_cached_model", request)
         if response.error is not None:
             raise FoundryLocalException(f"Failed to remove model from cache: {response.error}")
 
 
-    def unload(self) -> None:
+    async def unload(self) -> None:
         """Unload this variant from memory."""
-        self._model_load_manager.unload(self.id)
+        await self._model_load_manager.unload(self.id)
 
     def get_chat_client(self) -> ChatClient:
         """Create an OpenAI-compatible ``ChatClient`` for this variant."""
diff --git a/sdk/python/src/foundry_local_manager.py b/sdk/python/src/foundry_local_manager.py
index a649f8e5..b402c500 100644
--- a/sdk/python/src/foundry_local_manager.py
+++ b/sdk/python/src/foundry_local_manager.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
 import logging
 import threading
@@ -27,8 +28,8 @@
 class FoundryLocalManager:
     """Singleton manager for Foundry Local SDK operations.
 
-    Call ``FoundryLocalManager.initialize(config)`` once at startup, then access
-    the singleton via ``FoundryLocalManager.instance``.
+    Call ``await FoundryLocalManager.initialize(config)`` once at startup, then
+    access the singleton via ``FoundryLocalManager.instance``.
 
     Attributes:
         instance: The singleton ``FoundryLocalManager`` instance (set after ``initialize``).
@@ -40,17 +41,20 @@ class FoundryLocalManager:
     instance: FoundryLocalManager = None
 
     @staticmethod
-    def initialize(config: Configuration):
+    async def initialize(config: Configuration):
         """Initialize the Foundry Local SDK with the given configuration.
 
-        This method must be called before using any other part of the SDK.
+        This coroutine must be awaited before using any other part of the SDK::
+
+            await FoundryLocalManager.initialize(config)
+            manager = FoundryLocalManager.instance
 
         Args:
             config: Configuration object for the SDK.
         """
-        # Delegate singleton creation to the constructor, which enforces
-        # the singleton invariant under a lock and sets `instance`.
-        FoundryLocalManager(config)
+        # Run the synchronous constructor in a thread to avoid blocking
+        # the event loop during native library initialization.
+        await asyncio.to_thread(FoundryLocalManager, config)
         
     def __init__(self, config: Configuration):
         # Enforce singleton creation under a class-level lock and ensure
@@ -66,6 +70,7 @@ def __init__(self, config: Configuration):
             FoundryLocalManager.instance = self
 
         self.urls = None
+        self._async_lock = asyncio.Lock()
 
     def _initialize(self):
         set_default_logger_severity(self.config.log_level)
@@ -76,7 +81,7 @@ def _initialize(self):
         self._model_load_manager = ModelLoadManager(self._core_interop, external_service_url)
         self.catalog = Catalog(self._model_load_manager, self._core_interop)
 
-    def discover_eps(self) -> list[EpInfo]:
+    async def discover_eps(self) -> list[EpInfo]:
         """Discover available execution providers and their registration status.
 
         Returns:
@@ -85,7 +90,7 @@ def discover_eps(self) -> list[EpInfo]:
         Raises:
             FoundryLocalException: If EP discovery fails or response JSON is invalid.
         """
-        response = self._core_interop.execute_command("discover_eps")
+        response = await self._core_interop.execute_command("discover_eps")
         if response.error is not None:
             raise FoundryLocalException(f"Error discovering execution providers: {response.error}")
 
@@ -97,7 +102,7 @@ def discover_eps(self) -> list[EpInfo]:
                 f"Failed to decode JSON response from discover_eps: {e}. Response was: {response.data}"
             ) from e
 
-    def download_and_register_eps(
+    async def download_and_register_eps(
         self,
         names: Optional[list[str]] = None,
         progress_callback: Optional[Callable[[str, float], None]] = None,
@@ -121,21 +126,23 @@ def download_and_register_eps(
             request = InteropRequest(params={"Names": ",".join(names)})
 
         if progress_callback is not None:
+            loop = asyncio.get_running_loop()
+
             def _on_chunk(chunk: str) -> None:
                 sep = chunk.find("|")
                 if sep >= 0:
                     ep_name = chunk[:sep] or ""
                     try:
                         percent = float(chunk[sep + 1:])
-                        progress_callback(ep_name, percent)
+                        loop.call_soon_threadsafe(progress_callback, ep_name, percent)
                     except ValueError:
                         pass
 
-            response = self._core_interop.execute_command_with_callback(
+            response = await self._core_interop.execute_command_with_callback(
                 "download_and_register_eps", request, _on_chunk
             )
         else:
-            response = self._core_interop.execute_command("download_and_register_eps", request)
+            response = await self._core_interop.execute_command("download_and_register_eps", request)
 
         if response.error is not None:
             raise FoundryLocalException(f"Error downloading execution providers: {response.error}")
@@ -157,11 +164,11 @@ def _on_chunk(chunk: str) -> None:
         # Invalidate the catalog cache if any EP was newly registered so the next access
         # re-fetches models with the updated set of available EPs.
         if ep_result.success or len(ep_result.registered_eps) > 0:
-            self.catalog._invalidate_cache()
+            await self.catalog._invalidate_cache()
 
         return ep_result
 
-    def start_web_service(self):
+    async def start_web_service(self):
         """Start the optional web service.
 
         If provided, the service will be bound to the value of Configuration.web.urls.
@@ -169,8 +176,8 @@ def start_web_service(self):
 
         FoundryLocalManager.urls will be updated with the actual URL/s the service is listening on.
         """
-        with FoundryLocalManager._lock:
-            response = self._core_interop.execute_command("start_service")
+        async with self._async_lock:
+            response = await self._core_interop.execute_command("start_service")
 
             if response.error is not None:
                 raise FoundryLocalException(f"Error starting web service: {response.error}")
@@ -181,14 +188,14 @@ def start_web_service(self):
 
             self.urls = bound_urls
 
-    def stop_web_service(self):
+    async def stop_web_service(self):
         """Stop the optional web service."""
 
-        with FoundryLocalManager._lock:
+        async with self._async_lock:
             if self.urls is None:
                 raise FoundryLocalException("Web service is not running.")
 
-            response = self._core_interop.execute_command("stop_service")
+            response = await self._core_interop.execute_command("stop_service")
 
             if response.error is not None:
                 raise FoundryLocalException(f"Error stopping web service: {response.error}")
diff --git a/sdk/python/src/imodel.py b/sdk/python/src/imodel.py
index 8237aeb4..e1e7bcf2 100644
--- a/sdk/python/src/imodel.py
+++ b/sdk/python/src/imodel.py
@@ -32,15 +32,13 @@ def info(self) -> ModelInfo:
         """Full model metadata."""
         pass
 
-    @property
     @abstractmethod
-    def is_cached(self) -> bool:
+    async def is_cached(self) -> bool:
         """True if the model is present in the local cache."""
         pass
 
-    @property
     @abstractmethod
-    def is_loaded(self) -> bool:
+    async def is_loaded(self) -> bool:
         """True if the model is loaded into memory."""
         pass
 
@@ -75,7 +73,7 @@ def supports_tool_calling(self) -> Optional[bool]:
         pass
 
     @abstractmethod
-    def download(self, progress_callback: Callable[[float], None] = None) -> None:
+    async def download(self, progress_callback: Callable[[float], None] = None) -> None:
         """
         Download the model to local cache if not already present.
         :param progress_callback: Optional callback function for download progress as a percentage (0.0 to 100.0).
@@ -83,7 +81,7 @@ def download(self, progress_callback: Callable[[float], None] = None) -> None:
         pass
 
     @abstractmethod
-    def get_path(self) -> str:
+    async def get_path(self) -> str:
         """
         Gets the model path if cached.
         :return: Path of model directory.
@@ -91,21 +89,21 @@ def get_path(self) -> str:
         pass
 
     @abstractmethod
-    def load(self) -> None:
+    async def load(self) -> None:
         """
         Load the model into memory if not already loaded.
         """
         pass
 
     @abstractmethod
-    def remove_from_cache(self) -> None:
+    async def remove_from_cache(self) -> None:
         """
         Remove the model from the local cache.
         """
         pass
 
     @abstractmethod
-    def unload(self) -> None:
+    async def unload(self) -> None:
         """
         Unload the model if loaded.
         """
diff --git a/sdk/python/src/openai/audio_client.py b/sdk/python/src/openai/audio_client.py
index 0858e4aa..ff8f133d 100644
--- a/sdk/python/src/openai/audio_client.py
+++ b/sdk/python/src/openai/audio_client.py
@@ -5,12 +5,12 @@
 
 from __future__ import annotations
 
+import asyncio
 import json
 import logging
-import queue
 import threading
 from dataclasses import dataclass
-from typing import Generator, List, Optional
+from typing import AsyncGenerator, List, Optional
 
 from ..detail.core_interop import CoreInterop, InteropRequest
 from ..exception import FoundryLocalException
@@ -89,7 +89,7 @@ def _create_request_json(self, audio_file_path: str) -> str:
 
         return json.dumps(request)
 
-    def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
+    async def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
         """Transcribe an audio file (non-streaming).
 
         Args:
@@ -107,7 +107,7 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
         request_json = self._create_request_json(audio_file_path)
         request = InteropRequest(params={"OpenAICreateRequest": request_json})
 
-        response = self._core_interop.execute_command("audio_transcribe", request)
+        response = await self._core_interop.execute_command("audio_transcribe", request)
         if response.error is not None:
             raise FoundryLocalException(
                 f"Audio transcription failed for model '{self.model_id}': {response.error}"
@@ -116,19 +116,23 @@ def transcribe(self, audio_file_path: str) -> AudioTranscriptionResponse:
         data = json.loads(response.data)
         return AudioTranscriptionResponse(text=data.get("text", ""))
 
-    def _stream_chunks(self, request_json: str) -> Generator[AudioTranscriptionResponse, None, None]:
-        """Background-thread generator that yields parsed chunks from the native streaming call."""
-        _SENTINEL = object()
-        chunk_queue: queue.Queue = queue.Queue()
+    async def _stream_chunks(self, request_json: str) -> AsyncGenerator[AudioTranscriptionResponse, None]:
+        """Async generator that yields parsed chunks from the native streaming call."""
+        chunk_queue: asyncio.Queue = asyncio.Queue()
+        loop = asyncio.get_running_loop()
         errors: List[Exception] = []
+        cancelled = threading.Event()
 
         def _on_chunk(chunk_str: str) -> None:
+            if cancelled.is_set():
+                return
             chunk_data = json.loads(chunk_str)
-            chunk_queue.put(AudioTranscriptionResponse(text=chunk_data.get("text", "")))
+            chunk = AudioTranscriptionResponse(text=chunk_data.get("text", ""))
+            loop.call_soon_threadsafe(chunk_queue.put_nowait, chunk)
 
-        def _run() -> None:
+        async def _execute() -> None:
             try:
-                resp = self._core_interop.execute_command_with_callback(
+                resp = await self._core_interop.execute_command_with_callback(
                     "audio_transcribe",
                     InteropRequest(params={"OpenAICreateRequest": request_json}),
                     _on_chunk,
@@ -142,30 +146,44 @@ def _run() -> None:
             except Exception as exc:
                 errors.append(exc)
             finally:
-                chunk_queue.put(_SENTINEL)
-
-        threading.Thread(target=_run, daemon=True).start()
-        while (item := chunk_queue.get()) is not _SENTINEL:
-            yield item
+                await chunk_queue.put(None)
+
+        task = asyncio.create_task(_execute())
+        try:
+            while True:
+                item = await chunk_queue.get()
+                if item is None:
+                    break
+                yield item
+        finally:
+            # Signal the callback to drop further chunks, then cancel the task.
+            # The native call may continue on its worker thread, but _on_chunk
+            # will no-op so the queue stops growing.
+            cancelled.set()
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
         if errors:
             raise errors[0]
 
-    def transcribe_streaming(
+    async def transcribe_streaming(
         self,
         audio_file_path: str,
-    ) -> Generator[AudioTranscriptionResponse, None, None]:
+    ) -> AsyncGenerator[AudioTranscriptionResponse, None]:
         """Transcribe an audio file with streaming chunks.
 
-        Consume with a standard ``for`` loop::
+        Consume with a standard ``async for`` loop::
 
-            for chunk in audio_client.transcribe_streaming("recording.mp3"):
+            async for chunk in audio_client.transcribe_streaming("recording.mp3"):
                 print(chunk.text, end="", flush=True)
 
         Args:
             audio_file_path: Path to the audio file to transcribe.
 
         Returns:
-            A generator of ``AudioTranscriptionResponse`` objects.
+            An async generator of ``AudioTranscriptionResponse`` objects.
 
         Raises:
             ValueError: If *audio_file_path* is not a non-empty string.
@@ -174,4 +192,5 @@ def transcribe_streaming(
         self._validate_audio_file_path(audio_file_path)
 
         request_json = self._create_request_json(audio_file_path)
-        return self._stream_chunks(request_json)
\ No newline at end of file
+        async for chunk in self._stream_chunks(request_json):
+            yield chunk
\ No newline at end of file
diff --git a/sdk/python/src/openai/chat_client.py b/sdk/python/src/openai/chat_client.py
index 0b0d58bc..4b1b54c5 100644
--- a/sdk/python/src/openai/chat_client.py
+++ b/sdk/python/src/openai/chat_client.py
@@ -5,9 +5,9 @@
 
 from __future__ import annotations
 
+import asyncio
 import logging
 import json
-import queue
 import threading
 
 from ..detail.core_interop import CoreInterop, InteropRequest
@@ -18,7 +18,7 @@
                                                        CompletionCreateParamsStreaming
 from openai.types.chat import ChatCompletion
 from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
-from typing import Any, Dict, Generator, List, Optional
+from typing import Any, AsyncGenerator, Dict, List, Optional
 
 logger = logging.getLogger(__name__)
 
@@ -192,7 +192,7 @@ def _create_request(
 
         return json.dumps(chat_request)
 
-    def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None):
+    async def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optional[List[Dict[str, Any]]] = None):
         """Perform a non-streaming chat completion.
 
         Args:
@@ -212,7 +212,7 @@ def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optio
 
         # Send the request to the chat API
         request = InteropRequest(params={"OpenAICreateRequest": chat_request_json})
-        response = self._core_interop.execute_command("chat_completions", request)
+        response = await self._core_interop.execute_command("chat_completions", request)
         if response.error is not None:
             raise FoundryLocalException(f"Error during chat completion: {response.error}")
 
@@ -220,13 +220,16 @@ def complete_chat(self, messages: List[ChatCompletionMessageParam], tools: Optio
 
         return completion
 
-    def _stream_chunks(self, chat_request_json: str) -> Generator[ChatCompletionChunk, None, None]:
-        """Background-thread generator that yields parsed chunks from the native streaming call."""
-        _SENTINEL = object()
-        chunk_queue: queue.Queue = queue.Queue()
+    async def _stream_chunks(self, chat_request_json: str) -> AsyncGenerator[ChatCompletionChunk, None]:
+        """Async generator that yields parsed chunks from the native streaming call."""
+        chunk_queue: asyncio.Queue = asyncio.Queue()
+        loop = asyncio.get_running_loop()
         errors: List[Exception] = []
+        cancelled = threading.Event()
 
         def _on_chunk(response_str: str) -> None:
+            if cancelled.is_set():
+                return
             raw = json.loads(response_str)
             # Foundry Local returns tool call chunks with "message.tool_calls" instead
             # of the standard streaming "delta.tool_calls". Normalize to delta format
@@ -238,11 +241,12 @@ def _on_chunk(response_str: str) -> None:
                     for i, tc in enumerate(msg.get("tool_calls", [])):
                         tc.setdefault("index", i)
                     choice["delta"] = msg
-            chunk_queue.put(ChatCompletionChunk.model_validate(raw))
+            chunk = ChatCompletionChunk.model_validate(raw)
+            loop.call_soon_threadsafe(chunk_queue.put_nowait, chunk)
 
-        def _run() -> None:
+        async def _execute() -> None:
             try:
-                resp = self._core_interop.execute_command_with_callback(
+                resp = await self._core_interop.execute_command_with_callback(
                     "chat_completions",
                     InteropRequest(params={"OpenAICreateRequest": chat_request_json}),
                     _on_chunk,
@@ -252,24 +256,38 @@ def _run() -> None:
             except Exception as exc:
                 errors.append(exc)
             finally:
-                chunk_queue.put(_SENTINEL)
-
-        threading.Thread(target=_run, daemon=True).start()
-        while (item := chunk_queue.get()) is not _SENTINEL:
-            yield item
+                await chunk_queue.put(None)
+
+        task = asyncio.create_task(_execute())
+        try:
+            while True:
+                item = await chunk_queue.get()
+                if item is None:
+                    break
+                yield item
+        finally:
+            # Signal the callback to drop further chunks, then cancel the task.
+            # The native call may continue on its worker thread, but _on_chunk
+            # will no-op so the queue stops growing.
+            cancelled.set()
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
         if errors:
             raise errors[0]
 
-    def complete_streaming_chat(
+    async def complete_streaming_chat(
         self,
         messages: List[ChatCompletionMessageParam],
         tools: Optional[List[Dict[str, Any]]] = None,
-    ) -> Generator[ChatCompletionChunk, None, None]:
+    ) -> AsyncGenerator[ChatCompletionChunk, None]:
         """Perform a streaming chat completion, yielding chunks as they arrive.
 
-        Consume with a standard ``for`` loop::
+        Consume with a standard ``async for`` loop::
 
-            for chunk in client.complete_streaming_chat(messages):
+            async for chunk in client.complete_streaming_chat(messages):
                 if chunk.choices[0].delta.content:
                     print(chunk.choices[0].delta.content, end="", flush=True)
 
@@ -278,7 +296,7 @@ def complete_streaming_chat(
             tools: Optional list of tool definitions for function calling.
 
         Returns:
-            A generator of ``ChatCompletionChunk`` objects.
+            An async generator of ``ChatCompletionChunk`` objects.
 
         Raises:
             ValueError: If messages or tools are malformed.
@@ -287,4 +305,5 @@ def complete_streaming_chat(
         self._validate_messages(messages)
         self._validate_tools(tools)
         chat_request_json = self._create_request(messages, streaming=True, tools=tools)
-        return self._stream_chunks(chat_request_json)
+        async for chunk in self._stream_chunks(chat_request_json):
+            yield chunk
diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py
index 1cb85704..53f12124 100644
--- a/sdk/python/test/conftest.py
+++ b/sdk/python/test/conftest.py
@@ -16,6 +16,7 @@
 import logging
 
 import pytest
+import pytest_asyncio
 
 from pathlib import Path
 
@@ -105,14 +106,14 @@ def get_multiply_tool():
 # Session-scoped fixtures
 # ---------------------------------------------------------------------------
 
-@pytest.fixture(scope="session")
-def manager():
+@pytest_asyncio.fixture(scope="session")
+async def manager():
     """Initialize FoundryLocalManager once for the entire test session."""
     # Reset singleton in case a previous run left state
     FoundryLocalManager.instance = None
 
     config = get_test_config()
-    FoundryLocalManager.initialize(config)
+    await FoundryLocalManager.initialize(config)
     mgr = FoundryLocalManager.instance
     assert mgr is not None, "FoundryLocalManager.initialize did not set instance"
 
@@ -121,10 +122,10 @@ def manager():
     # Teardown: unload all loaded models
     try:
         catalog = mgr.catalog
-        loaded = catalog.get_loaded_models()
+        loaded = await catalog.get_loaded_models()
         for model_variant in loaded:
             try:
-                model_variant.unload()
+                await model_variant.unload()
             except Exception as e:
                 logger.warning("Failed to unload model %s during teardown: %s", model_variant.id, e)
     except Exception as e:
@@ -134,19 +135,19 @@ def manager():
     FoundryLocalManager.instance = None
 
 
-@pytest.fixture(scope="session")
-def catalog(manager):
+@pytest_asyncio.fixture(scope="session")
+async def catalog(manager):
     """Return the Catalog from the session-scoped manager."""
     return manager.catalog
 
 
-@pytest.fixture(scope="session")
-def core_interop(manager):
+@pytest_asyncio.fixture(scope="session")
+async def core_interop(manager):
     """Return the CoreInterop from the session-scoped manager (internal, for component tests)."""
     return manager._core_interop
 
 
-@pytest.fixture(scope="session")
-def model_load_manager(manager):
+@pytest_asyncio.fixture(scope="session")
+async def model_load_manager(manager):
     """Return the ModelLoadManager from the session-scoped manager (internal, for component tests)."""
     return manager._model_load_manager
diff --git a/sdk/python/test/detail/test_model_load_manager.py b/sdk/python/test/detail/test_model_load_manager.py
index a5a231e3..a64b9e14 100644
--- a/sdk/python/test/detail/test_model_load_manager.py
+++ b/sdk/python/test/detail/test_model_load_manager.py
@@ -15,67 +15,71 @@
 class TestModelLoadManagerCoreInterop:
     """ModelLoadManager tests using Core Interop (no external URL)."""
 
-    def _get_model_id(self, catalog) -> str:
+    async def _get_model_id(self, catalog) -> str:
         """Resolve the variant ID for the test model alias."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
         assert variant is not None, f"{TEST_MODEL_ALIAS} should be cached"
         return variant.id
 
-    def test_should_load_model(self, catalog, core_interop):
+    @pytest.mark.asyncio
+    async def test_should_load_model(self, catalog, core_interop):
         """Load model via core interop and verify it appears in loaded list."""
-        model_id = self._get_model_id(catalog)
+        model_id = await self._get_model_id(catalog)
         mlm = ModelLoadManager(core_interop)
 
-        mlm.load(model_id)
-        loaded = mlm.list_loaded()
+        await mlm.load(model_id)
+        loaded = await mlm.list_loaded()
         assert model_id in loaded
 
         # Cleanup
-        mlm.unload(model_id)
+        await mlm.unload(model_id)
 
-    def test_should_unload_model(self, catalog, core_interop):
+    @pytest.mark.asyncio
+    async def test_should_unload_model(self, catalog, core_interop):
         """Load then unload model via core interop."""
-        model_id = self._get_model_id(catalog)
+        model_id = await self._get_model_id(catalog)
         mlm = ModelLoadManager(core_interop)
 
-        mlm.load(model_id)
-        loaded = mlm.list_loaded()
+        await mlm.load(model_id)
+        loaded = await mlm.list_loaded()
         assert model_id in loaded
 
-        mlm.unload(model_id)
-        loaded = mlm.list_loaded()
+        await mlm.unload(model_id)
+        loaded = await mlm.list_loaded()
         assert model_id not in loaded
 
-    def test_should_list_loaded_models(self, catalog, core_interop):
+    @pytest.mark.asyncio
+    async def test_should_list_loaded_models(self, catalog, core_interop):
         """list_loaded() should return an array containing the loaded model."""
-        model_id = self._get_model_id(catalog)
+        model_id = await self._get_model_id(catalog)
         mlm = ModelLoadManager(core_interop)
 
-        mlm.load(model_id)
-        loaded = mlm.list_loaded()
+        await mlm.load(model_id)
+        loaded = await mlm.list_loaded()
 
         assert isinstance(loaded, list)
         assert model_id in loaded
 
         # Cleanup
-        mlm.unload(model_id)
+        await mlm.unload(model_id)
 
 
 class TestModelLoadManagerExternalService:
     """ModelLoadManager tests using external web service URL (skipped in CI)."""
 
     @skip_in_ci
-    def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop):
+    @pytest.mark.asyncio
+    async def test_should_load_and_unload_via_external_service(self, manager, catalog, core_interop):
         """Load/unload model through the web service endpoint."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
         assert variant is not None
         model_id = variant.id
 
         # Start web service
         try:
-            manager.start_web_service()
+            await manager.start_web_service()
         except Exception as e:
             pytest.skip(f"Failed to start web service: {e}")
 
@@ -88,33 +92,34 @@ def test_should_load_and_unload_via_external_service(self, manager, catalog, cor
         try:
             # Setup: load via core interop
             setup_mlm = ModelLoadManager(core_interop)
-            setup_mlm.load(model_id)
-            loaded = setup_mlm.list_loaded()
+            await setup_mlm.load(model_id)
+            loaded = await setup_mlm.list_loaded()
             assert model_id in loaded
 
             # Unload via external service
             ext_mlm = ModelLoadManager(core_interop, service_url)
-            ext_mlm.unload(model_id)
+            await ext_mlm.unload(model_id)
 
             # Verify via core interop
-            loaded = setup_mlm.list_loaded()
+            loaded = await setup_mlm.list_loaded()
             assert model_id not in loaded
         finally:
             try:
-                manager.stop_web_service()
+                await manager.stop_web_service()
             except Exception:
                 pass
 
     @skip_in_ci
-    def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop):
+    @pytest.mark.asyncio
+    async def test_should_list_loaded_via_external_service(self, manager, catalog, core_interop):
         """list_loaded() through the web service endpoint should match core interop."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
         assert variant is not None
         model_id = variant.id
 
         try:
-            manager.start_web_service()
+            await manager.start_web_service()
         except Exception as e:
             pytest.skip(f"Failed to start web service: {e}")
 
@@ -127,18 +132,18 @@ def test_should_list_loaded_via_external_service(self, manager, catalog, core_in
         try:
             # Setup: load via core
             setup_mlm = ModelLoadManager(core_interop)
-            setup_mlm.load(model_id)
+            await setup_mlm.load(model_id)
 
             # Verify via external service
             ext_mlm = ModelLoadManager(core_interop, service_url)
-            loaded = ext_mlm.list_loaded()
+            loaded = await ext_mlm.list_loaded()
             assert isinstance(loaded, list)
             assert model_id in loaded
 
             # Cleanup
-            setup_mlm.unload(model_id)
+            await setup_mlm.unload(model_id)
         finally:
             try:
-                manager.stop_web_service()
+                await manager.stop_web_service()
             except Exception:
                 pass
diff --git a/sdk/python/test/openai/test_audio_client.py b/sdk/python/test/openai/test_audio_client.py
index 0d365eef..160577be 100644
--- a/sdk/python/test/openai/test_audio_client.py
+++ b/sdk/python/test/openai/test_audio_client.py
@@ -19,28 +19,29 @@
 )
 
 
-def _get_loaded_audio_model(catalog):
+async def _get_loaded_audio_model(catalog):
     """Helper: ensure the whisper model is selected, loaded, and return Model."""
-    cached = catalog.get_cached_models()
+    cached = await catalog.get_cached_models()
     assert len(cached) > 0
 
     cached_variant = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None)
     assert cached_variant is not None, f"{AUDIO_MODEL_ALIAS} should be cached"
 
-    model = catalog.get_model(AUDIO_MODEL_ALIAS)
+    model = await catalog.get_model(AUDIO_MODEL_ALIAS)
     assert model is not None
 
     model.select_variant(cached_variant)
-    model.load()
+    await model.load()
     return model
 
 
 class TestAudioClient:
     """Audio Client Tests."""
 
-    def test_should_transcribe_audio(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_transcribe_audio(self, catalog):
         """Non-streaming transcription of Recording.mp3."""
-        model = _get_loaded_audio_model(catalog)
+        model = await _get_loaded_audio_model(catalog)
         try:
             audio_client = model.get_audio_client()
             assert audio_client is not None
@@ -48,7 +49,7 @@ def test_should_transcribe_audio(self, catalog):
             audio_client.settings.language = "en"
             audio_client.settings.temperature = 0.0
 
-            response = audio_client.transcribe(AUDIO_FILE_PATH)
+            response = await audio_client.transcribe(AUDIO_FILE_PATH)
 
             assert response is not None
             assert hasattr(response, "text")
@@ -56,11 +57,12 @@ def test_should_transcribe_audio(self, catalog):
             assert len(response.text) > 0
             assert response.text == EXPECTED_TEXT
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_transcribe_audio_with_temperature(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_transcribe_audio_with_temperature(self, catalog):
         """Non-streaming transcription with explicit temperature."""
-        model = _get_loaded_audio_model(catalog)
+        model = await _get_loaded_audio_model(catalog)
         try:
             audio_client = model.get_audio_client()
             assert audio_client is not None
@@ -68,18 +70,19 @@ def test_should_transcribe_audio_with_temperature(self, catalog):
             audio_client.settings.language = "en"
             audio_client.settings.temperature = 0.0
 
-            response = audio_client.transcribe(AUDIO_FILE_PATH)
+            response = await audio_client.transcribe(AUDIO_FILE_PATH)
 
             assert response is not None
             assert isinstance(response.text, str)
             assert len(response.text) > 0
             assert response.text == EXPECTED_TEXT
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_transcribe_audio_streaming(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_transcribe_audio_streaming(self, catalog):
         """Streaming transcription of Recording.mp3."""
-        model = _get_loaded_audio_model(catalog)
+        model = await _get_loaded_audio_model(catalog)
         try:
             audio_client = model.get_audio_client()
             assert audio_client is not None
@@ -88,7 +91,7 @@ def test_should_transcribe_audio_streaming(self, catalog):
             audio_client.settings.temperature = 0.0
 
             chunks = []
-            for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
+            async for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
                 assert chunk is not None
                 assert hasattr(chunk, "text")
                 assert isinstance(chunk.text, str)
@@ -98,11 +101,12 @@ def test_should_transcribe_audio_streaming(self, catalog):
             full_text = "".join(chunks)
             assert full_text == EXPECTED_TEXT
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
         """Streaming transcription with explicit temperature."""
-        model = _get_loaded_audio_model(catalog)
+        model = await _get_loaded_audio_model(catalog)
         try:
             audio_client = model.get_audio_client()
             assert audio_client is not None
@@ -111,7 +115,7 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
             audio_client.settings.temperature = 0.0
 
             chunks = []
-            for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
+            async for chunk in audio_client.transcribe_streaming(AUDIO_FILE_PATH):
                 assert chunk is not None
                 assert isinstance(chunk.text, str)
                 chunks.append(chunk.text)
@@ -119,22 +123,25 @@ def test_should_transcribe_audio_streaming_with_temperature(self, catalog):
             full_text = "".join(chunks)
             assert full_text == EXPECTED_TEXT
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_raise_for_empty_audio_file_path(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_raise_for_empty_audio_file_path(self, catalog):
         """transcribe('') should raise."""
-        model = catalog.get_model(AUDIO_MODEL_ALIAS)
+        model = await catalog.get_model(AUDIO_MODEL_ALIAS)
         assert model is not None
         audio_client = model.get_audio_client()
 
         with pytest.raises(ValueError, match="Audio file path must be a non-empty string"):
-            audio_client.transcribe("")
+            await audio_client.transcribe("")
 
-    def test_should_raise_for_streaming_empty_audio_file_path(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_raise_for_streaming_empty_audio_file_path(self, catalog):
         """transcribe_streaming('') should raise."""
-        model = catalog.get_model(AUDIO_MODEL_ALIAS)
+        model = await catalog.get_model(AUDIO_MODEL_ALIAS)
         assert model is not None
         audio_client = model.get_audio_client()
 
         with pytest.raises(ValueError, match="Audio file path must be a non-empty string"):
-            audio_client.transcribe_streaming("")
+            async for _ in audio_client.transcribe_streaming(""):
+                pass
diff --git a/sdk/python/test/openai/test_chat_client.py b/sdk/python/test/openai/test_chat_client.py
index d96891b9..3580c38b 100644
--- a/sdk/python/test/openai/test_chat_client.py
+++ b/sdk/python/test/openai/test_chat_client.py
@@ -13,34 +13,35 @@
 from ..conftest import TEST_MODEL_ALIAS, get_multiply_tool
 
 
-def _get_loaded_chat_model(catalog):
+async def _get_loaded_chat_model(catalog):
     """Helper: ensure the test model is selected, loaded, and return Model + ChatClient."""
-    cached = catalog.get_cached_models()
+    cached = await catalog.get_cached_models()
     assert len(cached) > 0
 
     cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
     assert cached_variant is not None, f"{TEST_MODEL_ALIAS} should be cached"
 
-    model = catalog.get_model(TEST_MODEL_ALIAS)
+    model = await catalog.get_model(TEST_MODEL_ALIAS)
     assert model is not None
 
     model.select_variant(cached_variant)
-    model.load()
+    await model.load()
     return model
 
 
 class TestChatClient:
     """Chat Client Tests."""
 
-    def test_should_perform_chat_completion(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_perform_chat_completion(self, catalog):
         """Non-streaming chat: 7 * 6 should include '42' in the response."""
-        model = _get_loaded_chat_model(catalog)
+        model = await _get_loaded_chat_model(catalog)
         try:
             client = model.get_chat_client()
             client.settings.max_tokens = 500
             client.settings.temperature = 0.0  # deterministic
 
-            result = client.complete_chat([
+            result = await client.complete_chat([
                 {"role": "user",
                  "content": "You are a calculator. Be precise. What is the answer to 7 multiplied by 6?"}
             ])
@@ -53,11 +54,12 @@ def test_should_perform_chat_completion(self, catalog):
             assert isinstance(content, str)
             assert "42" in content
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_perform_streaming_chat_completion(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_perform_streaming_chat_completion(self, catalog):
         """Streaming chat: 7 * 6 = 42, then follow-up +25 = 67."""
-        model = _get_loaded_chat_model(catalog)
+        model = await _get_loaded_chat_model(catalog)
         try:
             client = model.get_chat_client()
             client.settings.max_tokens = 500
@@ -69,7 +71,7 @@ def test_should_perform_streaming_chat_completion(self, catalog):
             ]
 
             # ---- First question ----
-            chunks = list(client.complete_streaming_chat(messages))
+            chunks = [c async for c in client.complete_streaming_chat(messages)]
             assert len(chunks) > 0
             first_response = "".join(
                 c.choices[0].delta.content
@@ -82,7 +84,7 @@ def test_should_perform_streaming_chat_completion(self, catalog):
             messages.append({"role": "assistant", "content": first_response})
             messages.append({"role": "user", "content": "Add 25 to the previous answer. Think hard to be sure of the answer."})
 
-            chunks = list(client.complete_streaming_chat(messages))
+            chunks = [c async for c in client.complete_streaming_chat(messages)]
             assert len(chunks) > 0
             second_response = "".join(
                 c.choices[0].delta.content
@@ -91,47 +93,54 @@ def test_should_perform_streaming_chat_completion(self, catalog):
             )
             assert "67" in second_response
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_raise_for_empty_messages(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_raise_for_empty_messages(self, catalog):
         """complete_chat with empty list should raise."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         client = model.get_chat_client()
 
         with pytest.raises(ValueError):
-            client.complete_chat([])
+            await client.complete_chat([])
 
-    def test_should_raise_for_none_messages(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_raise_for_none_messages(self, catalog):
         """complete_chat with None should raise."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         client = model.get_chat_client()
 
         with pytest.raises(ValueError):
-            client.complete_chat(None)
+            await client.complete_chat(None)
 
-    def test_should_raise_for_streaming_empty_messages(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_raise_for_streaming_empty_messages(self, catalog):
         """complete_streaming_chat with empty list should raise."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         client = model.get_chat_client()
 
         with pytest.raises(ValueError):
-            client.complete_streaming_chat([])
+            async for _ in client.complete_streaming_chat([]):
+                pass
 
-    def test_should_raise_for_streaming_none_messages(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_raise_for_streaming_none_messages(self, catalog):
         """complete_streaming_chat with None should raise."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         client = model.get_chat_client()
 
         with pytest.raises(ValueError):
-            client.complete_streaming_chat(None)
+            async for _ in client.complete_streaming_chat(None):
+                pass
 
-    def test_should_perform_tool_calling_chat_completion(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_perform_tool_calling_chat_completion(self, catalog):
         """Tool calling (non-streaming): model uses multiply_numbers tool to answer 7 * 6."""
-        model = _get_loaded_chat_model(catalog)
+        model = await _get_loaded_chat_model(catalog)
         try:
             client = model.get_chat_client()
             client.settings.max_tokens = 500
@@ -145,7 +154,7 @@ def test_should_perform_tool_calling_chat_completion(self, catalog):
             tools = [get_multiply_tool()]
 
             # First turn: model should respond with a tool call
-            response = client.complete_chat(messages, tools)
+            response = await client.complete_chat(messages, tools)
 
             assert response is not None
             assert response.choices is not None
@@ -168,16 +177,17 @@ def test_should_perform_tool_calling_chat_completion(self, catalog):
             messages.append({"role": "system", "content": "Respond only with the answer generated by the tool."})
 
             client.settings.tool_choice = {"type": "auto"}
-            response = client.complete_chat(messages, tools)
+            response = await client.complete_chat(messages, tools)
 
             assert response.choices[0].message.content is not None
             assert "42" in response.choices[0].message.content
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
         """Tool calling (streaming): model uses multiply_numbers tool, then continue conversation."""
-        model = _get_loaded_chat_model(catalog)
+        model = await _get_loaded_chat_model(catalog)
         try:
             client = model.get_chat_client()
             client.settings.max_tokens = 500
@@ -191,7 +201,7 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
             tools = [get_multiply_tool()]
 
             # First turn: collect chunks and find the tool call
-            chunks = list(client.complete_streaming_chat(messages, tools))
+            chunks = [c async for c in client.complete_streaming_chat(messages, tools)]
             last_tool_call_chunk = next(
                 (c for c in reversed(chunks)
                  if c.choices and c.choices[0].delta and c.choices[0].delta.tool_calls),
@@ -216,7 +226,7 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
 
             client.settings.tool_choice = {"type": "auto"}
 
-            chunks = list(client.complete_streaming_chat(messages, tools))
+            chunks = [c async for c in client.complete_streaming_chat(messages, tools)]
             second_response = "".join(
                 c.choices[0].delta.content
                 for c in chunks
@@ -224,20 +234,18 @@ def test_should_perform_tool_calling_streaming_chat_completion(self, catalog):
             )
             assert "42" in second_response
         finally:
-            model.unload()
+            await model.unload()
 
-    def test_should_return_generator(self, catalog):
-        """complete_streaming_chat returns a generator that yields chunks."""
-        model = _get_loaded_chat_model(catalog)
+    @pytest.mark.asyncio
+    async def test_should_return_async_generator(self, catalog):
+        """complete_streaming_chat returns an async generator that yields chunks."""
+        model = await _get_loaded_chat_model(catalog)
         try:
             client = model.get_chat_client()
             client.settings.max_tokens = 50
             client.settings.temperature = 0.0
 
-            result = client.complete_streaming_chat([{"role": "user", "content": "Say hi."}])
-
-            assert result is not None
-            chunks = list(result)
+            chunks = [c async for c in client.complete_streaming_chat([{"role": "user", "content": "Say hi."}])]
             assert len(chunks) > 0
         finally:
-            model.unload()
\ No newline at end of file
+            await model.unload()
\ No newline at end of file
diff --git a/sdk/python/test/test_catalog.py b/sdk/python/test/test_catalog.py
index 2e5968cc..5d13326c 100644
--- a/sdk/python/test/test_catalog.py
+++ b/sdk/python/test/test_catalog.py
@@ -7,6 +7,7 @@
 from __future__ import annotations
 
 import json
+import pytest
 
 from foundry_local_sdk.catalog import Catalog
 from foundry_local_sdk.detail.core_interop import Response
@@ -17,14 +18,16 @@
 class TestCatalog:
     """Catalog Tests."""
 
-    def test_should_initialize_with_catalog_name(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_initialize_with_catalog_name(self, catalog):
         """Catalog should expose a non-empty name string."""
         assert isinstance(catalog.name, str)
         assert len(catalog.name) > 0
 
-    def test_should_list_models(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_list_models(self, catalog):
         """list_models() should return a non-empty list containing the test model."""
-        models = catalog.list_models()
+        models = await catalog.list_models()
         assert isinstance(models, list)
         assert len(models) > 0
 
@@ -32,25 +35,29 @@ def test_should_list_models(self, catalog):
         aliases = {m.alias for m in models}
         assert TEST_MODEL_ALIAS in aliases
 
-    def test_should_get_model_by_alias(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_get_model_by_alias(self, catalog):
         """get_model() should return a Model whose alias matches."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         assert model.alias == TEST_MODEL_ALIAS
 
-    def test_should_return_none_for_empty_alias(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_return_none_for_empty_alias(self, catalog):
         """get_model('') should return None (unknown alias)."""
-        result = catalog.get_model("")
+        result = await catalog.get_model("")
         assert result is None
 
-    def test_should_return_none_for_unknown_alias(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_return_none_for_unknown_alias(self, catalog):
         """get_model() with a random alias should return None."""
-        result = catalog.get_model("definitely-not-a-real-model-alias-12345")
+        result = await catalog.get_model("definitely-not-a-real-model-alias-12345")
         assert result is None
 
-    def test_should_get_cached_models(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_get_cached_models(self, catalog):
         """get_cached_models() should return a list with at least the test model."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         assert isinstance(cached, list)
         assert len(cached) > 0
 
@@ -58,27 +65,31 @@ def test_should_get_cached_models(self, catalog):
         aliases = {m.alias for m in cached}
         assert TEST_MODEL_ALIAS in aliases
 
-    def test_should_get_model_variant_by_id(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_get_model_variant_by_id(self, catalog):
         """get_model_variant() with a valid ID should return the variant."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         assert len(cached) > 0
         variant = cached[0]
 
-        result = catalog.get_model_variant(variant.id)
+        result = await catalog.get_model_variant(variant.id)
         assert result is not None
         assert result.id == variant.id
 
-    def test_should_return_none_for_empty_variant_id(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_return_none_for_empty_variant_id(self, catalog):
         """get_model_variant('') should return None."""
-        result = catalog.get_model_variant("")
+        result = await catalog.get_model_variant("")
         assert result is None
 
-    def test_should_return_none_for_unknown_variant_id(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_return_none_for_unknown_variant_id(self, catalog):
         """get_model_variant() with a random ID should return None."""
-        result = catalog.get_model_variant("definitely-not-a-real-model-id-12345")
+        result = await catalog.get_model_variant("definitely-not-a-real-model-id-12345")
         assert result is None
 
-    def test_should_resolve_latest_version_for_model_and_variant_inputs(self):
+    @pytest.mark.asyncio
+    async def test_should_resolve_latest_version_for_model_and_variant_inputs(self):
         """get_latest_version() should resolve latest variant and preserve Model input when already latest."""
 
         test_model_infos = [
@@ -124,9 +135,12 @@ def test_should_resolve_latest_version_for_model_and_variant_inputs(self):
         ]
 
         class _MockCoreInterop:
-            def execute_command(self, command_name, command_input=None):
+            def _execute_command(self, command_name, command_input=None):
                 if command_name == "get_catalog_name":
                     return Response(data="TestCatalog", error=None)
+                return Response(data=None, error=f"Unexpected command: {command_name}")
+
+            async def execute_command(self, command_name, command_input=None):
                 if command_name == "get_model_list":
                     return Response(data=json.dumps(test_model_infos), error=None)
                 if command_name == "get_cached_models":
@@ -134,12 +148,12 @@ def execute_command(self, command_name, command_input=None):
                 return Response(data=None, error=f"Unexpected command: {command_name}")
 
         class _MockModelLoadManager:
-            def list_loaded(self):
+            async def list_loaded(self):
                 return []
 
         catalog = Catalog(_MockModelLoadManager(), _MockCoreInterop())
 
-        model = catalog.get_model("test-alias")
+        model = await catalog.get_model("test-alias")
         assert model is not None
 
         variants = model.variants
@@ -153,15 +167,15 @@ def list_loaded(self):
         assert middle_variant.id == "test-model:2"
         assert oldest_variant.id == "test-model:1"
 
-        result1 = catalog.get_latest_version(latest_variant)
+        result1 = await catalog.get_latest_version(latest_variant)
         assert result1.id == "test-model:3"
 
-        result2 = catalog.get_latest_version(middle_variant)
+        result2 = await catalog.get_latest_version(middle_variant)
         assert result2.id == "test-model:3"
 
-        result3 = catalog.get_latest_version(oldest_variant)
+        result3 = await catalog.get_latest_version(oldest_variant)
         assert result3.id == "test-model:3"
 
         model.select_variant(latest_variant)
-        result4 = catalog.get_latest_version(model)
+        result4 = await catalog.get_latest_version(model)
         assert result4 is model
diff --git a/sdk/python/test/test_foundry_local_manager.py b/sdk/python/test/test_foundry_local_manager.py
index 31528891..bf860931 100644
--- a/sdk/python/test/test_foundry_local_manager.py
+++ b/sdk/python/test/test_foundry_local_manager.py
@@ -6,6 +6,8 @@
 
 from __future__ import annotations
 
+import pytest
+
 
 class _Response:
     def __init__(self, data=None, error=None):
@@ -18,7 +20,7 @@ def __init__(self, responses):
         self._responses = responses
         self.calls = []
 
-    def execute_command(self, command_name, command_input=None):
+    async def execute_command(self, command_name, command_input=None):
         self.calls.append((command_name, command_input))
         return self._responses[command_name]
 
@@ -26,18 +28,21 @@ def execute_command(self, command_name, command_input=None):
 class TestFoundryLocalManager:
     """Foundry Local Manager Tests."""
 
-    def test_should_initialize_successfully(self, manager):
+    @pytest.mark.asyncio
+    async def test_should_initialize_successfully(self, manager):
         """Manager singleton should be non-None after initialize()."""
         assert manager is not None
 
-    def test_should_return_catalog(self, manager):
+    @pytest.mark.asyncio
+    async def test_should_return_catalog(self, manager):
         """Manager should expose a Catalog with a non-empty name."""
         catalog = manager.catalog
         assert catalog is not None
         assert isinstance(catalog.name, str)
         assert len(catalog.name) > 0
 
-    def test_discover_eps_returns_ep_info(self, manager):
+    @pytest.mark.asyncio
+    async def test_discover_eps_returns_ep_info(self, manager):
         original_core = manager._core_interop
         manager._core_interop = _FakeCoreInterop(
             {
@@ -49,7 +54,7 @@ def test_discover_eps_returns_ep_info(self, manager):
         )
 
         try:
-            eps = manager.discover_eps()
+            eps = await manager.discover_eps()
         finally:
             manager._core_interop = original_core
 
@@ -58,7 +63,8 @@ def test_discover_eps_returns_ep_info(self, manager):
         assert eps[0].name == "CUDAExecutionProvider"
         assert eps[0].is_registered is True
 
-    def test_download_and_register_eps_returns_result(self, manager):
+    @pytest.mark.asyncio
+    async def test_download_and_register_eps_returns_result(self, manager):
         original_core = manager._core_interop
         manager._core_interop = _FakeCoreInterop(
             {
@@ -73,7 +79,7 @@ def test_download_and_register_eps_returns_result(self, manager):
         )
 
         try:
-            result = manager.download_and_register_eps(["CUDAExecutionProvider"])
+            result = await manager.download_and_register_eps(["CUDAExecutionProvider"])
         finally:
             manager._core_interop = original_core
 
diff --git a/sdk/python/test/test_model.py b/sdk/python/test/test_model.py
index e2ea1509..b4e47114 100644
--- a/sdk/python/test/test_model.py
+++ b/sdk/python/test/test_model.py
@@ -6,83 +6,91 @@
 
 from __future__ import annotations
 
+import pytest
+
 from .conftest import TEST_MODEL_ALIAS, AUDIO_MODEL_ALIAS
 
 
 class TestModel:
     """Model Tests."""
 
-    def test_should_verify_cached_models(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_verify_cached_models(self, catalog):
         """Cached models from test-data-shared should include qwen and whisper."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         assert isinstance(cached, list)
         assert len(cached) > 0
 
         # Check qwen model is cached
         qwen = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
         assert qwen is not None, f"{TEST_MODEL_ALIAS} should be cached"
-        assert qwen.is_cached is True
+        assert await qwen.is_cached() is True
 
         # Check whisper model is cached
         whisper = next((m for m in cached if m.alias == AUDIO_MODEL_ALIAS), None)
         assert whisper is not None, f"{AUDIO_MODEL_ALIAS} should be cached"
-        assert whisper.is_cached is True
+        assert await whisper.is_cached() is True
 
-    def test_should_load_and_unload_model(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_load_and_unload_model(self, catalog):
         """Load/unload cycle should toggle is_loaded on the selected variant."""
-        cached = catalog.get_cached_models()
+        cached = await catalog.get_cached_models()
         assert len(cached) > 0
 
         cached_variant = next((m for m in cached if m.alias == TEST_MODEL_ALIAS), None)
         assert cached_variant is not None
 
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
 
         model.select_variant(cached_variant)
 
         # Ensure it's not loaded initially (or unload if it is)
-        if model.is_loaded:
-            model.unload()
-        assert model.is_loaded is False
+        if await model.is_loaded():
+            await model.unload()
+        assert await model.is_loaded() is False
 
         try:
-            model.load()
-            assert model.is_loaded is True
+            await model.load()
+            assert await model.is_loaded() is True
 
-            model.unload()
-            assert model.is_loaded is False
+            await model.unload()
+            assert await model.is_loaded() is False
         finally:
             # Safety cleanup
-            if model.is_loaded:
-                model.unload()
+            if await model.is_loaded():
+                await model.unload()
 
-    def test_should_expose_context_length(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_expose_context_length(self, catalog):
         """Model should expose context_length from ModelInfo metadata."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         # context_length should be None or a positive integer
         ctx = model.context_length
         assert ctx is None or (isinstance(ctx, int) and ctx > 0)
 
-    def test_should_expose_modalities(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_expose_modalities(self, catalog):
         """Model should expose input_modalities and output_modalities."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         # Modalities should be None or non-empty strings
         for val in (model.input_modalities, model.output_modalities):
             assert val is None or (isinstance(val, str) and len(val) > 0)
 
-    def test_should_expose_capabilities(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_expose_capabilities(self, catalog):
         """Model should expose capabilities metadata."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         caps = model.capabilities
         assert caps is None or (isinstance(caps, str) and len(caps) > 0)
 
-    def test_should_expose_supports_tool_calling(self, catalog):
+    @pytest.mark.asyncio
+    async def test_should_expose_supports_tool_calling(self, catalog):
         """Model should expose supports_tool_calling metadata."""
-        model = catalog.get_model(TEST_MODEL_ALIAS)
+        model = await catalog.get_model(TEST_MODEL_ALIAS)
         assert model is not None
         stc = model.supports_tool_calling
         assert stc is None or isinstance(stc, bool)