luarss · ArmaanBawa · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026 · Mar 28, 2026
diff --git a/tests/interactive/test_session_manager.py b/tests/interactive/test_session_manager.py
@@ -1,6 +1,7 @@
 """Tests for SessionManager implementation."""
 
 import asyncio
+import os
 from unittest.mock import AsyncMock, patch
 
 import pytest
@@ -10,6 +11,16 @@
 from openroad_mcp.interactive.models import SessionNotFoundError, SessionTerminatedError
 
 
+@pytest.fixture(scope="session", autouse=True)
+def configure_allowed_commands():
+    """Configure allowed commands for interactive sessions."""
+    os.environ["OPENROAD_ALLOWED_COMMANDS"] = "openroad,bash"
+    yield
+    # Cleanup after tests
+    if "OPENROAD_ALLOWED_COMMANDS" in os.environ:
+        del os.environ["OPENROAD_ALLOWED_COMMANDS"]
+
+
 @pytest.mark.asyncio
 class TestSessionManager:
     """Test suite for SessionManager."""

diff --git a/tests/performance/test_benchmarks.py b/tests/performance/test_benchmarks.py
@@ -89,8 +89,8 @@ async def test_output_streaming_throughput(self, benchmark_timeout):
         assert duration < 5.0, f"Streaming took {duration:.3f}s (>5s timeout)"
 
     async def test_concurrent_session_scalability(self, benchmark_timeout):
-        """Test concurrent session scalability with 50+ sessions and p99/p95 latency metrics."""
-        session_manager = SessionManager()
+        """Test concurrent session scalability with 50+ sessions using real PTY calls."""
+        session_manager = SessionManager(max_sessions=60)
 
         try:
             # GSoC Phase 1 target: 50+ concurrent sessions
@@ -99,7 +99,7 @@ async def test_concurrent_session_scalability(self, benchmark_timeout):
 
             start_time = time.perf_counter()
 
-            # Create sessions concurrently
+            # Create sessions concurrently using real openroad PTY calls
             async def create_session_with_delay():
                 await asyncio.sleep(0.001)  # Small delay to simulate real usage
                 return await session_manager.create_session()
@@ -114,33 +114,31 @@ async def create_session_with_delay():
             print(f"  Duration: {creation_time:.3f}s")
             print(f"  Rate: {len(session_ids) / creation_time:.1f} sessions/sec")
 
-            # Verify all sessions created successfully
+            # Verify all sessions created successfully with unique IDs (no cross-pollution)
             assert len(session_ids) == concurrent_sessions
-            assert len(set(session_ids)) == concurrent_sessions  # All unique IDs, no cross-pollution
+            assert len(set(session_ids)) == concurrent_sessions
 
             # Performance assertions
             assert creation_time < 10.0, f"Concurrent creation took {creation_time:.3f}s (>10s)"
 
-            # Test concurrent command execution with per-command latency tracking
+            # Test concurrent command execution via real PTY with per-command latency tracking
             command_latencies = []
 
-            with (
-                patch("openroad_mcp.interactive.session.InteractiveSession.send_command"),
-                patch("openroad_mcp.interactive.session.InteractiveSession.read_output") as mock_read,
-            ):
-                mock_read.return_value = AsyncMock()
-                mock_read.return_value.output = "test output"
-                mock_read.return_value.execution_time = 0.01
+            async def execute_with_latency(sid):
+                t0 = time.perf_counter()
+                result = await session_manager.execute_command(sid, "echo hello")
+                latency = time.perf_counter() - t0
+                command_latencies.append(latency)
+                return sid, result
 
-                async def execute_with_latency(session_id):
-                    t0 = time.perf_counter()
-                    result = await session_manager.execute_command(session_id, "test command")
-                    latency = time.perf_counter() - t0
-                    command_latencies.append(latency)
-                    return result
+            tasks = [execute_with_latency(sid) for sid in session_ids]
+            results = await asyncio.gather(*tasks)
 
-                tasks = [execute_with_latency(sid) for sid in session_ids]
-                await asyncio.gather(*tasks)
+            # Verify output content and session binding (no cross-pollution)
+            for sid, result in results:
+                assert result is not None, f"Session {sid} returned no result"
+                output = result.output if hasattr(result, "output") else str(result)
+                assert "hello" in output, f"Session {sid} output missing 'hello': {output!r}"
 
             # Calculate p99, p95, mean latency
             if not command_latencies:
@@ -158,9 +156,9 @@ async def execute_with_latency(session_id):
             print(f"  p99 latency:  {p99_latency * 1000:.2f}ms")
 
             # Latency assertions under 50-session concurrency
-            assert mean_latency < 0.05, f"Mean latency {mean_latency * 1000:.2f}ms exceeds 50ms"
-            assert p95_latency < 0.10, f"p95 latency {p95_latency * 1000:.2f}ms exceeds 100ms"
-            assert p99_latency < 0.20, f"p99 latency {p99_latency * 1000:.2f}ms exceeds 200ms"
+            assert mean_latency < 1.0, f"Mean latency {mean_latency * 1000:.2f}ms exceeds 1000ms"
+            assert p95_latency < 2.0, f"p95 latency {p95_latency * 1000:.2f}ms exceeds 2000ms"
+            assert p99_latency < 3.0, f"p99 latency {p99_latency * 1000:.2f}ms exceeds 3000ms"
 
         finally:
             await session_manager.cleanup_all()

diff --git a/tests/performance/test_memory_monitoring.py b/tests/performance/test_memory_monitoring.py
@@ -2,7 +2,6 @@
 
 import asyncio
 import gc
-import os
 import time
 
 import psutil