diff --git a/src/openroad_mcp/core/manager.py b/src/openroad_mcp/core/manager.py index 18bd1af..d8b8dc3 100644 --- a/src/openroad_mcp/core/manager.py +++ b/src/openroad_mcp/core/manager.py @@ -94,6 +94,9 @@ async def execute_command( actual_timeout = timeout_ms or self._default_timeout_ms try: + # Discard any output buffered before this command (e.g. startup banner) + # so that read_output only captures the response to this specific command. + await session.output_buffer.drain_all() await session.send_command(command) result = await session.read_output(actual_timeout) diff --git a/src/openroad_mcp/interactive/session.py b/src/openroad_mcp/interactive/session.py index ad924cd..ca01b7f 100644 --- a/src/openroad_mcp/interactive/session.py +++ b/src/openroad_mcp/interactive/session.py @@ -163,9 +163,6 @@ async def _start_background_tasks(self) -> None: self._writer_task = asyncio.create_task(self._write_input()) self._exit_monitor_task = asyncio.create_task(self._monitor_exit()) - # Wait for background tasks to start and initial output to be available - # await self._wait_for_startup_ready() - async def _wait_for_startup_ready(self, timeout: float = 2.0) -> None: """Wait for background tasks to be ready and initial output to be available.""" logger.info(f"Session {self.session_id} waiting for startup readiness (timeout={timeout}s)") diff --git a/tests/performance/test_benchmarks.py b/tests/performance/test_benchmarks.py index df1823a..ed12060 100644 --- a/tests/performance/test_benchmarks.py +++ b/tests/performance/test_benchmarks.py @@ -2,6 +2,7 @@ import asyncio import math +import os import time from unittest.mock import AsyncMock, patch @@ -89,7 +90,7 @@ async def test_output_streaming_throughput(self, benchmark_timeout): assert duration < 5.0, f"Streaming took {duration:.3f}s (>5s timeout)" async def test_concurrent_session_scalability(self, benchmark_timeout): - """Test concurrent session scalability with 50+ sessions and p99/p95 latency metrics.""" + """Test concurrent session scalability with 50+ sessions using real PTY calls.""" session_manager = SessionManager() try: @@ -99,7 +100,7 @@ async def test_concurrent_session_scalability(self, benchmark_timeout): start_time = time.perf_counter() - # Create sessions concurrently + # Create sessions concurrently using real openroad PTY calls async def create_session_with_delay(): await asyncio.sleep(0.001) # Small delay to simulate real usage return await session_manager.create_session() @@ -114,33 +115,31 @@ async def create_session_with_delay(): print(f" Duration: {creation_time:.3f}s") print(f" Rate: {len(session_ids) / creation_time:.1f} sessions/sec") - # Verify all sessions created successfully + # Verify all sessions created successfully with unique IDs (no cross-pollution) assert len(session_ids) == concurrent_sessions assert len(set(session_ids)) == concurrent_sessions # All unique IDs, no cross-pollution # Performance assertions assert creation_time < 10.0, f"Concurrent creation took {creation_time:.3f}s (>10s)" - # Test concurrent command execution with per-command latency tracking + # Test concurrent command execution via real PTY with per-command latency tracking. command_latencies = [] - with ( - patch("openroad_mcp.interactive.session.InteractiveSession.send_command"), - patch("openroad_mcp.interactive.session.InteractiveSession.read_output") as mock_read, - ): - mock_read.return_value = AsyncMock() - mock_read.return_value.output = "test output" - mock_read.return_value.execution_time = 0.01 + async def execute_with_latency(sid): + t0 = time.perf_counter() + result = await session_manager.execute_command(sid, "puts hello") + latency = time.perf_counter() - t0 + command_latencies.append(latency) + return sid, result - async def execute_with_latency(session_id): - t0 = time.perf_counter() - result = await session_manager.execute_command(session_id, "test command") - latency = time.perf_counter() - t0 - command_latencies.append(latency) - return result + tasks = [execute_with_latency(sid) for sid in session_ids] + results = await asyncio.gather(*tasks) - tasks = [execute_with_latency(sid) for sid in session_ids] - await asyncio.gather(*tasks) + # Verify output content and session binding (no cross-pollution) + for sid, result in results: + assert result is not None, f"Session {sid} returned no result" + output = result.output if hasattr(result, "output") else str(result) + assert "hello" in output, f"Session {sid} output missing 'hello': {output!r}" # Calculate p99, p95, mean latency if not command_latencies: @@ -158,16 +157,15 @@ async def execute_with_latency(session_id): print(f" p99 latency: {p99_latency * 1000:.2f}ms") # Latency assertions under 50-session concurrency - assert mean_latency < 0.05, f"Mean latency {mean_latency * 1000:.2f}ms exceeds 50ms" - assert p95_latency < 0.10, f"p95 latency {p95_latency * 1000:.2f}ms exceeds 100ms" - assert p99_latency < 0.20, f"p99 latency {p99_latency * 1000:.2f}ms exceeds 200ms" + assert mean_latency < 1.0, f"Mean latency {mean_latency * 1000:.2f}ms exceeds 1000ms" + assert p95_latency < 2.0, f"p95 latency {p95_latency * 1000:.2f}ms exceeds 2000ms" + assert p99_latency < 3.0, f"p99 latency {p99_latency * 1000:.2f}ms exceeds 3000ms" finally: await session_manager.cleanup_all() async def test_memory_usage_profiling(self, benchmark_timeout): """Test memory usage profiling.""" - import os import psutil