Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
110bf3f
perf: replace mocks with real PTY calls in concurrent session scalabi…
ArmaanBawa Mar 28, 2026
4fbd77b
fix: allow bash command in concurrent session tests
ArmaanBawa Mar 28, 2026
54803b1
fix: remove bash from allowed commands, use openroad PTY directly
ArmaanBawa Mar 28, 2026
dfc76cb
fix: capture gather results and assert output content + session binding
ArmaanBawa Mar 28, 2026
7057e8b
fix: save and restore singleton max_sessions to prevent test leakage
ArmaanBawa Mar 28, 2026
4deb00b
fix: add missing os import in test_memory_monitoring.py
ArmaanBawa Mar 28, 2026
a8f124b
fix: add missing os import in test_benchmarks.py
ArmaanBawa Mar 28, 2026
d8ae872
fix: remove duplicate os import inside test_memory_usage_profiling
ArmaanBawa Mar 28, 2026
327362c
fix: use TCL puts hello instead of bash echo in concurrent session test
ArmaanBawa Mar 28, 2026
56459a8
fix: remove unused configure_allowed_commands fixture and os import
ArmaanBawa Mar 28, 2026
f1c6843
fix: change max_sessions override from 60 to 50 to match test concurr…
ArmaanBawa Mar 28, 2026
2f453b3
fix: drain startup banner before asserting command output in perf test
ArmaanBawa Mar 29, 2026
d881c52
Update tests/performance/test_benchmarks.py
luarss Mar 29, 2026
6505053
Update tests/performance/test_benchmarks.py
luarss Mar 29, 2026
cc1c5f4
fix: replace flaky sleep-based banner drain with sentinel command
ArmaanBawa Mar 29, 2026
2eb9289
fix: retry sentinel until __ready__ confirmed in output
ArmaanBawa Mar 29, 2026
c506523
fix: retry puts hello directly to drain stale banner/sentinel output
ArmaanBawa Apr 2, 2026
cc98397
fix: sync test_benchmarks.py with main (add _max_sessions override + …
ArmaanBawa Apr 2, 2026
353457d
refactor: remove redundant max_sessions=50 override in perf test
ArmaanBawa Apr 2, 2026
3973c85
fix: drain output buffer before sending command to fix stale output bug
ArmaanBawa Apr 3, 2026
fa3fcfb
fix: wait for startup banner before returning from create_session
ArmaanBawa Apr 3, 2026
0bb7642
fix: revert wait_for_data in _start_background_tasks
ArmaanBawa Apr 3, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions tests/interactive/test_session_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for SessionManager implementation."""

import asyncio
import os
from unittest.mock import AsyncMock, patch

import pytest
Expand All @@ -10,6 +11,16 @@
from openroad_mcp.interactive.models import SessionNotFoundError, SessionTerminatedError


@pytest.fixture(scope="session", autouse=True)
def configure_allowed_commands():
"""Configure allowed commands for interactive sessions."""
os.environ["OPENROAD_ALLOWED_COMMANDS"] = "openroad,bash"
yield
# Cleanup after tests
if "OPENROAD_ALLOWED_COMMANDS" in os.environ:
del os.environ["OPENROAD_ALLOWED_COMMANDS"]


Comment thread
luarss marked this conversation as resolved.
Outdated
@pytest.mark.asyncio
class TestSessionManager:
"""Test suite for SessionManager."""
Expand Down
46 changes: 22 additions & 24 deletions tests/performance/test_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ async def test_output_streaming_throughput(self, benchmark_timeout):
assert duration < 5.0, f"Streaming took {duration:.3f}s (>5s timeout)"

async def test_concurrent_session_scalability(self, benchmark_timeout):
"""Test concurrent session scalability with 50+ sessions and p99/p95 latency metrics."""
session_manager = SessionManager()
"""Test concurrent session scalability with 50+ sessions using real PTY calls."""
session_manager = SessionManager(max_sessions=60)
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated

try:
# GSoC Phase 1 target: 50+ concurrent sessions
Expand All @@ -99,7 +99,7 @@ async def test_concurrent_session_scalability(self, benchmark_timeout):

start_time = time.perf_counter()

# Create sessions concurrently
# Create sessions concurrently using real openroad PTY calls
async def create_session_with_delay():
await asyncio.sleep(0.001) # Small delay to simulate real usage
return await session_manager.create_session()
Expand All @@ -114,33 +114,31 @@ async def create_session_with_delay():
print(f" Duration: {creation_time:.3f}s")
print(f" Rate: {len(session_ids) / creation_time:.1f} sessions/sec")

# Verify all sessions created successfully
# Verify all sessions created successfully with unique IDs (no cross-pollution)
assert len(session_ids) == concurrent_sessions
assert len(set(session_ids)) == concurrent_sessions # All unique IDs, no cross-pollution
assert len(set(session_ids)) == concurrent_sessions

# Performance assertions
assert creation_time < 10.0, f"Concurrent creation took {creation_time:.3f}s (>10s)"

# Test concurrent command execution with per-command latency tracking
# Test concurrent command execution via real PTY with per-command latency tracking
command_latencies = []

with (
patch("openroad_mcp.interactive.session.InteractiveSession.send_command"),
patch("openroad_mcp.interactive.session.InteractiveSession.read_output") as mock_read,
):
mock_read.return_value = AsyncMock()
mock_read.return_value.output = "test output"
mock_read.return_value.execution_time = 0.01
async def execute_with_latency(sid):
t0 = time.perf_counter()
result = await session_manager.execute_command(sid, "echo hello")
latency = time.perf_counter() - t0
command_latencies.append(latency)
return sid, result

async def execute_with_latency(session_id):
t0 = time.perf_counter()
result = await session_manager.execute_command(session_id, "test command")
latency = time.perf_counter() - t0
command_latencies.append(latency)
return result
tasks = [execute_with_latency(sid) for sid in session_ids]
results = await asyncio.gather(*tasks)

tasks = [execute_with_latency(sid) for sid in session_ids]
await asyncio.gather(*tasks)
# Verify output content and session binding (no cross-pollution)
for sid, result in results:
assert result is not None, f"Session {sid} returned no result"
output = result.output if hasattr(result, "output") else str(result)
assert "hello" in output, f"Session {sid} output missing 'hello': {output!r}"

# Calculate p99, p95, mean latency
if not command_latencies:
Expand All @@ -158,9 +156,9 @@ async def execute_with_latency(session_id):
print(f" p99 latency: {p99_latency * 1000:.2f}ms")

# Latency assertions under 50-session concurrency
assert mean_latency < 0.05, f"Mean latency {mean_latency * 1000:.2f}ms exceeds 50ms"
assert p95_latency < 0.10, f"p95 latency {p95_latency * 1000:.2f}ms exceeds 100ms"
assert p99_latency < 0.20, f"p99 latency {p99_latency * 1000:.2f}ms exceeds 200ms"
assert mean_latency < 1.0, f"Mean latency {mean_latency * 1000:.2f}ms exceeds 1000ms"
assert p95_latency < 2.0, f"p95 latency {p95_latency * 1000:.2f}ms exceeds 2000ms"
assert p99_latency < 3.0, f"p99 latency {p99_latency * 1000:.2f}ms exceeds 3000ms"

finally:
await session_manager.cleanup_all()
Expand Down
1 change: 0 additions & 1 deletion tests/performance/test_memory_monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import asyncio
import gc
import os
import time

import psutil
Expand Down