OpenMind · ShaohongZ · Mar 7, 2026 · Mar 9, 2026 · Mar 9, 2026 · Mar 9, 2026
@@ -0,0 +1,126 @@
+{
+  // Configuration version
+  version: "v1.0.3",
+
+  // Mode system configuration for Unitree Go2
+  default_mode: "greeting",
+  allow_manual_switching: true,
+  mode_memory_enabled: true,
+
+  // Global settings
+  api_key: "${OM_API_KEY:-openmind_free}",
+  unitree_ethernet: "${UNITREE_ETHERNET:-enP2p1s0}",
+  system_governance: "",
+  cortex_llm: {
+    type: "QwenLLMSimplified",
+    config: {
+      agent_name: "Bits",
+      history_length: 2,
+      base_url: "${QWEN_BASE_URL:-http://omr2.local:8860}/v1",
+      model: "nvidia/nemotron-3-nano",
+    },
+  },
+
+  knowledge_base: {
+    knowledge_base_name: "demo",
+    base_url: "${KB_BASE_URL:-http://localhost:8100}",
+  },
+
+  modes: {
+    approaching: {
+      display_name: "Approaching Person Mode",
+      description: "Robot approaches detected humans autonomously.",
+      system_prompt_base: "You are Bits, a friendly and helpful robotic companion built on a Unitree Go2 platform.",
+      hertz: 0.001,
+      agent_inputs: [],
+      action_execution_mode: "concurrent",
+      agent_actions: [],
+      backgrounds: [
+        {
+          type: "ApproachingPerson",
+        },
+      ],
+      lifecycle_hooks: [],
+    },
+    greeting: {
+      display_name: "Greeting Conversation Mode",
+      description: "Robot engages in greeting conversations with users upon approach.",
+      system_prompt_base: "You are Bits, a friendly robot dog at NVIDIA GTC made by OpenMind. Today is March 9, 2026. Answer in 1-2 short spoken sentences. If relevant info is provided, use it — rephrase in your own words.",
+      hertz: 0.001,
+      agent_inputs: [
+        {
+          type: "RivaASRRTSPInput",
+          config: {
+            base_url: "ws://localhost:6790",
+            enable_tts_interrupt: false,
+          },
+        },
+      ],
+      action_execution_mode: "concurrent",
+      agent_actions: [
+        {
+          name: "greeting_conversation_simplified",
+          llm_label: "greeting_conversation",
+          connector: "greeting_conversation_kokoro_simplified",
+          config: {
+            model_id: "mlx-community/Kokoro-82M-bf16",
+            base_url: "${KOKORO_BASE_URL:-http://omr2.local:8880}/v1",
+          },
+        },
+      ],
+      backgrounds: [],
+      lifecycle_hooks: [
+        {
+          hook_type: "on_startup",
+          handler_type: "message",
+          handler_config: {
+            tts_provider: "kokoro",
+            message: "Hello! I'm Bits, how can I help you today?",
+            model_id: "mlx-community/Kokoro-82M-bf16",
+            base_url: "${KOKORO_BASE_URL:-http://omr2.local:8880}/v1",
+          },
+        },
+        {
+          hook_type: "on_entry",
+          handler_type: "message",
+          handler_config: {
+            tts_provider: "kokoro",
+            message: "Hey there! What can I help you with?",
+            model_id: "mlx-community/Kokoro-82M-bf16",
+            base_url: "${KOKORO_BASE_URL:-http://omr2.local:8880}/v1",
+          },
+        },
+        {
+          hook_type: "on_exit",
+          handler_type: "function",
+          handler_config: {
+            module_name: "greeting_hook",
+            function: "geeting_end_hook",
+            tts_provider: "kokoro",
+            model_id: "mlx-community/Kokoro-82M-bf16",
+            base_url: "${KOKORO_BASE_URL:-http://omr2.local:8880}/v1",
+          },
+        },
+      ],
+    },
+  },
+
+  transition_rules: [
+    {
+      from_mode: "approaching",
+      to_mode: "greeting",
+      transition_type: "context_aware",
+      context_conditions: { approaching_detected: true },
+      priority: 0,
+      cooldown_seconds: 5.0,
+    },
+    {
+      from_mode: "greeting",
+      to_mode: "approaching",
+      transition_type: "context_aware",
+      context_conditions: { greeting_conversation_finished: true },
+      priority: 0,
+      cooldown_seconds: 5.0,
+    },
+  ],
+}
@@ -0,0 +1,262 @@
+import asyncio
+import json
+import logging
+import time
+from uuid import uuid4
+
+from pydantic import Field
+
+from actions.base import ActionConfig, ActionConnector
+from actions.greeting_conversation_simplified.interface import (
+    GreetingConversationSimplifiedInput,
+)
+from providers.context_provider import ContextProvider
+from providers.greeting_conversation_state_provider import (
+    ConversationState,
+    GreetingConversationStateMachineProvider,
+)
+from providers.kokoro_tts_provider import KokoroTTSProvider
+from providers.tts_text_utils import normalize_tts_text
+from zenoh_msgs import (
+    PersonGreetingStatus,
+    String,
+    open_zenoh_session,
+    prepare_header,
+)
+
+
+class SpeakKokoroTTSConfig(ActionConfig):
+    """
+    Configuration for Kokoro TTS connector.
+
+    Parameters
+    ----------
+    base_url : str
+        Base URL for Kokoro TTS API.
+    voice_id : str
+        Kokoro voice ID.
+    model_id : str
+        Kokoro model ID.
+    output_format : str
+        Kokoro output format.
+    rate : int
+        Audio sample rate in Hz.
+    enable_tts_interrupt : bool
+        Enable TTS interrupt when ASR detects speech during playback.
+    silence_rate : int
+        Number of responses to skip before speaking.
+    """
+
+    base_url: str = Field(
+        default="http://127.0.0.1:8880/v1",
+        description="Base URL for Kokoro TTS API",
+    )
+    voice_id: str = Field(
+        default="af_bella",
+        description="Kokoro voice ID",
+    )
+    model_id: str = Field(
+        default="kokoro",
+        description="Kokoro model ID",
+    )
+    output_format: str = Field(
+        default="pcm",
+        description="Kokoro output format",
+    )
+    rate: int = Field(
+        default=24000,
+        description="Audio sample rate in Hz",
+    )
+    enable_tts_interrupt: bool = Field(
+        default=False,
+        description="Enable TTS interrupt when ASR detects speech during playback",
+    )
+    silence_rate: int = Field(
+        default=0,
+        description="Number of responses to skip before speaking",
+    )
+
+
+class GreetingConversationConnector(
+    ActionConnector[SpeakKokoroTTSConfig, GreetingConversationSimplifiedInput]
+):
+    """
+    Simplified greeting conversation connector with Kokoro TTS.
+
+    Uses a single 'response' field from the LLM and hardcodes
+    conversation state values for the state machine.
+    Applies TTS text normalization (e.g. month abbreviation expansion).
+    """
+
+    def __init__(self, config: SpeakKokoroTTSConfig):
+        super().__init__(config)
+
+        self.greeting_state_provider = GreetingConversationStateMachineProvider()
+        self.greeting_state_provider.start_conversation()
+
+        self.context_provider = ContextProvider()
+
+        # Create Kokoro TTS provider
+        api_key = getattr(self.config, "api_key", None)
+        logging.info("Creating Kokoro TTS provider")
+        self.tts = KokoroTTSProvider(
+            url=self.config.base_url,
+            api_key=api_key,
+            voice_id=self.config.voice_id,
+            model_id=self.config.model_id,
+            output_format=self.config.output_format,
+            rate=self.config.rate,
+            enable_tts_interrupt=self.config.enable_tts_interrupt,
+        )
+        self.tts.start()
+
+        self.tts_triggered_time = time.time()
+        self.tts_duration = 0.0
+        self.conversation_finished_sent = False
+        self.pending_finished_update = False
+        self.delayed_update_task = None
+
+        self.person_greeting_topic = "om/person_greeting"
+        try:
+            self.session = open_zenoh_session()
+            logging.info("Zenoh session opened for PersonGreetingStatus publishing")
+        except Exception as e:
+            logging.error(f"Error opening Zenoh session: {e}")
+            self.session = None
+
+        self.greeting_status = ConversationState.CONVERSING.value
+
+    async def connect(
+        self, output_interface: GreetingConversationSimplifiedInput
+    ) -> None:
+        """
+        Process the greeting conversation response.
+
+        Only reads 'response' from the LLM output and hardcodes
+        conversation state values for the state machine.
+        """
+        logging.info(f"Greeting Response: {output_interface.response}")
+
+        llm_output = {
+            "conversation_state": ConversationState.CONVERSING.value,
+            "response": output_interface.response,
+            "confidence": 0.85,
+            "speech_clarity": 0.85,
+        }
+
+        tts_text = normalize_tts_text(output_interface.response)
+        self.tts.add_pending_message(tts_text)
+
+        # Estimate TTS duration based on text length (~100 words per minute speech rate)
+        word_count = len(output_interface.response.split())
+        self.tts_duration = (
+            word_count / 100.0
+        ) * 60.0 + 5  # Convert to seconds and add buffer time
+        self.tts_triggered_time = time.time()
+
+        state_update = self.greeting_state_provider.process_conversation(llm_output)
+        current_state = state_update.get("current_state", self.greeting_status)
+        self.greeting_status = current_state
+        self.publish_countdown_status(self.greeting_status)
+
+        logging.info(f"Greeting Conversation Response: {state_update}")
+
+        if (
+            self.greeting_status == ConversationState.FINISHED.value
+            and not self.conversation_finished_sent
+        ):
+            logging.info(
+                f"Greeting conversation state is FINISHED. "
+                f"Scheduling context update after TTS completes ({self.tts_duration:.1f}s)."
+            )
+            self.pending_finished_update = True
+            self.conversation_finished_sent = True
+            self.delayed_update_task = asyncio.create_task(
+                self._delayed_context_update((word_count / 150.0) * 60.0)
+            )
+
+    async def _delayed_context_update(self, wait_duration: float) -> None:
+        """Wait for TTS to finish, then update context to indicate conversation finished."""
+        try:
+            logging.info(
+                f"Waiting {wait_duration:.1f}s for TTS to complete before updating context..."
+            )
+            await asyncio.sleep(wait_duration)
+
+            if self.pending_finished_update:
+                logging.info(
+                    "TTS completed. Updating context: greeting_conversation_finished = True"
+                )
+                self.context_provider.update_context(
+                    {"greeting_conversation_finished": True}
+                )
+                self.pending_finished_update = False
+        except Exception as e:
+            logging.error(f"Error in delayed context update: {e}")
+
+    def tick(self) -> None:
+        """Periodically update conversation state even without LLM input."""
+        logging.info("GreetingConversationConnector tick called")
+        self.sleep(10)
+
+        if time.time() - self.tts_triggered_time < self.tts_duration:
+            logging.info(
+                f"Skipping tick update due to recent TTS activity "
+                f"(remaining: {self.tts_duration - (time.time() - self.tts_triggered_time):.1f}s)."
+            )
+            return
+
+        state_update = self.greeting_state_provider.update_state_without_llm()
+        current_state = state_update.get("current_state", self.greeting_status)
+        self.greeting_status = current_state
+        self.publish_countdown_status(self.greeting_status)
+
+        if (
+            current_state == ConversationState.FINISHED.value
+            and not self.conversation_finished_sent
+        ):
+            logging.info("Greeting conversation has finished (detected in tick).")
+            self.context_provider.update_context(
+                {"greeting_conversation_finished": True}
+            )
+            self.conversation_finished_sent = True
+
+        logging.info(
+            f"State: {current_state}, "
+            f"Confidence: {state_update.get('confidence', {}).get('overall', 0):.2f}, "
+            f"Silence: {state_update.get('silence_duration', 0):.1f}s"
+        )
+
+    def publish_countdown_status(self, current_state: str) -> None:
+        """Publish countdown status to Zenoh based on current conversation state."""
+        if current_state == ConversationState.CONVERSING.value:
+            seconds_until_finished = 20
+        elif current_state == ConversationState.CONCLUDING.value:
+            seconds_until_finished = 10
+        else:
+            seconds_until_finished = 0
+
+        if self.session:
+            request_id = str(uuid4())
+            message_text = json.dumps(
+                {"seconds_until_finished": seconds_until_finished}
+            )
+            try:
+                self.session.put(
+                    self.person_greeting_topic,
+                    PersonGreetingStatus(
+                        header=prepare_header(request_id),
+                        request_id=String(data=request_id),
+                        status=PersonGreetingStatus.STATUS.CONVERSATION.value,
+                        message=String(data=message_text),
+                    ).serialize(),
+                )
+                logging.info(f"Published PersonGreetingStatus: {message_text}")
+            except Exception as e:
+                logging.error(f"Error publishing PersonGreetingStatus: {e}")
+
+    def stop(self):
+        """Stop the connector and clean up resources."""
+        logging.info("Stopping Greeting Conversation action...")
+        if self.session:
+            self.session.close()