From 78dc42fd2911ccecae02d590913b8151e8765ae9 Mon Sep 17 00:00:00 2001
From: MaxAdmk <maksimkeny@gmail.com>
Date: Mon, 19 Jan 2026 15:33:46 +0200
Subject: [PATCH 1/2] Refactor LLM usage to Groq (llama-3.3-70b-versatile) and
 harden research flow

- Migrate all LLM nodes (query generation, reflection, final answer) from Gemini to Groq using llama-3.3-70b-versatile
- Keep Gemini exclusively for Google Search grounding to preserve citation metadata
- Add graceful handling for Google Search API quota exhaustion (429 / RESOURCE_EXHAUSTED)
  - Return safe fallback state instead of crashing the graph
- Guard against missing or partial API responses
  - Handle empty candidates and absent grounding metadata
  - Fall back to plain-text extraction when citations cannot be generated
- Improve robustness and documentation
  - Defensive access to graph state keys
  - Fix docstring inconsistencies and remove redundant comments
---
 backend/examples/cli_research.py   |   2 +-
 backend/pyproject.toml             |   1 +
 backend/src/agent/configuration.py |  19 +++--
 backend/src/agent/graph.py         | 123 ++++++++++++++++++-----------
 4 files changed, 93 insertions(+), 52 deletions(-)

diff --git a/backend/examples/cli_research.py b/backend/examples/cli_research.py
index a086496be..eda7ad79b 100644
--- a/backend/examples/cli_research.py
+++ b/backend/examples/cli_research.py
@@ -21,7 +21,7 @@ def main() -> None:
     )
     parser.add_argument(
         "--reasoning-model",
-        default="gemini-2.5-pro-preview-05-06",
+        default="llama-3.3-70b-versatile",
         help="Model for the final answer",
     )
     args = parser.parse_args()
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index 09eb59885..d681f1679 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = [
     "langgraph>=0.2.6",
     "langchain>=0.3.19",
     "langchain-google-genai",
+    "langchain-groq",
     "python-dotenv>=1.0.1",
     "langgraph-sdk>=0.1.57",
     "langgraph-cli",
diff --git a/backend/src/agent/configuration.py b/backend/src/agent/configuration.py
index e57122d23..75a24a579 100644
--- a/backend/src/agent/configuration.py
+++ b/backend/src/agent/configuration.py
@@ -9,23 +9,30 @@ class Configuration(BaseModel):
     """The configuration for the agent."""
 
     query_generator_model: str = Field(
-        default="gemini-2.0-flash",
+        default="llama-3.3-70b-versatile",
         metadata={
-            "description": "The name of the language model to use for the agent's query generation."
+            "description": "Groq model name to use for the agent's query generation."
         },
     )
 
     reflection_model: str = Field(
-        default="gemini-2.5-flash",
+        default="llama-3.3-70b-versatile",
         metadata={
-            "description": "The name of the language model to use for the agent's reflection."
+            "description": "Groq model name to use for the agent's reflection."
         },
     )
 
     answer_model: str = Field(
-        default="gemini-2.5-pro",
+        default="llama-3.3-70b-versatile",
+        metadata={
+            "description": "Groq model name to use for the agent's answer."
+        },
+    )
+
+    web_search_model: str = Field(
+        default="gemini-2.0-flash",
         metadata={
-            "description": "The name of the language model to use for the agent's answer."
+            "description": "Gemini model name to use for Google Search grounding."
         },
     )
 
diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py
index 0f19c3f2f..d336e4aa7 100644
--- a/backend/src/agent/graph.py
+++ b/backend/src/agent/graph.py
@@ -8,6 +8,7 @@
 from langgraph.graph import START, END
 from langchain_core.runnables import RunnableConfig
 from google.genai import Client
+from google.genai.errors import ClientError
 
 from agent.state import (
     OverallState,
@@ -23,7 +24,7 @@
     reflection_instructions,
     answer_instructions,
 )
-from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_groq import ChatGroq
 from agent.utils import (
     get_citations,
     get_research_topic,
@@ -36,6 +37,9 @@
 if os.getenv("GEMINI_API_KEY") is None:
     raise ValueError("GEMINI_API_KEY is not set")
 
+if os.getenv("GROQ_API_KEY") is None:
+    raise ValueError("GROQ_API_KEY is not set")
+
 # Used for Google Search API
 genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
 
@@ -44,8 +48,8 @@
 def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
     """LangGraph node that generates search queries based on the User's question.
 
-    Uses Gemini 2.0 Flash to create an optimized search queries for web research based on
-    the User's question.
+    Uses Groq (llama-3.3-70b-versatile) to create optimized search queries for web
+    research based on the User's question.
 
     Args:
         state: Current graph state containing the User's question
@@ -60,16 +64,15 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
     if state.get("initial_search_query_count") is None:
         state["initial_search_query_count"] = configurable.number_of_initial_queries
 
-    # init Gemini 2.0 Flash
-    llm = ChatGoogleGenerativeAI(
+    # init Groq LLM
+    llm = ChatGroq(
         model=configurable.query_generator_model,
         temperature=1.0,
         max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
+        api_key=os.getenv("GROQ_API_KEY"),
     )
     structured_llm = llm.with_structured_output(SearchQueryList)
 
-    # Format the prompt
     current_date = get_current_date()
     formatted_prompt = query_writer_instructions.format(
         current_date=current_date,
@@ -95,7 +98,7 @@ def continue_to_web_research(state: QueryGenerationState):
 def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
     """LangGraph node that performs web research using the native Google Search API tool.
 
-    Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash.
+    Executes a web search using the native Google Search API tool.
 
     Args:
         state: Current graph state containing the search query and research loop count
@@ -104,34 +107,56 @@ def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
     Returns:
         Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
     """
-    # Configure
     configurable = Configuration.from_runnable_config(config)
     formatted_prompt = web_searcher_instructions.format(
         current_date=get_current_date(),
         research_topic=state["search_query"],
     )
 
-    # Uses the google genai client as the langchain client doesn't return grounding metadata
-    response = genai_client.models.generate_content(
-        model=configurable.query_generator_model,
-        contents=formatted_prompt,
-        config={
-            "tools": [{"google_search": {}}],
-            "temperature": 0,
-        },
-    )
-    # resolve the urls to short urls for saving tokens and time
-    resolved_urls = resolve_urls(
-        response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
+    try:
+        response = genai_client.models.generate_content(
+            model=configurable.web_search_model,
+            contents=formatted_prompt,
+            config={
+                "tools": [{"google_search": {}}],
+                "temperature": 0,
+            },
+        )
+    except ClientError as e:
+        if getattr(e, 'status_code', None) == 429 or (
+            "RESOURCE_EXHAUSTED" in str(e)
+        ):
+            return {
+                "sources_gathered": [],
+                "web_research_result": [
+                    "Web search skipped: Google Search API quota exceeded (RESOURCE_EXHAUSTED)."
+                ],
+            }
+        else:
+            raise
+    # If the response does not contain grounding metadata, fall back to plain text
+    candidates = getattr(response, "candidates", None) or []
+    grounding_metadata = (
+        getattr(candidates[0], "grounding_metadata", None) if candidates else None
     )
-    # Gets the citations and adds them to the generated text
-    citations = get_citations(response, resolved_urls)
-    modified_text = insert_citation_markers(response.text, citations)
-    sources_gathered = [item for citation in citations for item in citation["segments"]]
+    if not grounding_metadata or not getattr(grounding_metadata, "grounding_chunks", None):
+        modified_text = getattr(response, "text", str(response))
+        sources_gathered = []
+    else:
+        # resolve the urls to short urls for saving tokens and time
+        resolved_urls = resolve_urls(
+            grounding_metadata.grounding_chunks,
+            state["id"],
+        )
+        # Gets the citations and adds them to the generated text
+        citations = get_citations(response, resolved_urls)
+        modified_text = insert_citation_markers(response.text, citations)
+        sources_gathered = [
+            item for citation in citations for item in citation["segments"]
+        ]
 
     return {
         "sources_gathered": sources_gathered,
-        "search_query": [state["search_query"]],
         "web_research_result": [modified_text],
     }
 
@@ -148,26 +173,24 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         config: Configuration for the runnable, including LLM provider settings
 
     Returns:
-        Dictionary with state update, including search_query key containing the generated follow-up query
+        Dictionary with state update including is_sufficient, knowledge_gap, follow_up_queries, research_loop_count, and number_of_ran_queries
     """
     configurable = Configuration.from_runnable_config(config)
     # Increment the research loop count and get the reasoning model
     state["research_loop_count"] = state.get("research_loop_count", 0) + 1
     reasoning_model = state.get("reasoning_model", configurable.reflection_model)
 
-    # Format the prompt
     current_date = get_current_date()
     formatted_prompt = reflection_instructions.format(
         current_date=current_date,
         research_topic=get_research_topic(state["messages"]),
-        summaries="\n\n---\n\n".join(state["web_research_result"]),
+            summaries="\n\n---\n\n".join(state["web_research_result"]),
     )
-    # init Reasoning Model
-    llm = ChatGoogleGenerativeAI(
+    llm = ChatGroq(
         model=reasoning_model,
         temperature=1.0,
         max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
+        api_key=os.getenv("GROQ_API_KEY"),
     )
     result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
 
@@ -176,7 +199,7 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         "knowledge_gap": result.knowledge_gap,
         "follow_up_queries": result.follow_up_queries,
         "research_loop_count": state["research_loop_count"],
-        "number_of_ran_queries": len(state["search_query"]),
+        "number_of_ran_queries": len(state.get("search_query", [])),
     }
 
 
@@ -194,7 +217,7 @@ def evaluate_research(
         config: Configuration for the runnable, including max_research_loops setting
 
     Returns:
-        String literal indicating the next node to visit ("web_research" or "finalize_summary")
+        String "finalize_answer" or list of Send objects for "web_research" nodes
     """
     configurable = Configuration.from_runnable_config(config)
     max_research_loops = (
@@ -233,26 +256,36 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
     configurable = Configuration.from_runnable_config(config)
     reasoning_model = state.get("reasoning_model") or configurable.answer_model
 
-    # Format the prompt
     current_date = get_current_date()
-    formatted_prompt = answer_instructions.format(
-        current_date=current_date,
-        research_topic=get_research_topic(state["messages"]),
-        summaries="\n---\n\n".join(state["web_research_result"]),
-    )
-
-    # init Reasoning Model, default to Gemini 2.5 Flash
-    llm = ChatGoogleGenerativeAI(
+    if any("Web search skipped" in r for r in state.get("web_research_result", [])):
+        # Fallback prompt: short, explicit about quota, forbids citations/links/outlets
+        formatted_prompt = (
+            "Web search was skipped due to quota limits.\n"
+            "Do NOT include any links, citations, markdown links, or name, suggest, or mention any sources or news outlets—including 'apnews', 'vertexaisearch', or any examples.\n"
+            "Do NOT recommend that the user check sources or news outlets.\n"
+            "\n"
+            "Based only on general knowledge, provide a brief best-effort answer to the following question:\n"
+            f"Question: {get_research_topic(state['messages'])}"
+        )
+    else:
+        # Normal prompt with citations if sources exist
+        formatted_prompt = answer_instructions.format(
+            current_date=current_date,
+            research_topic=get_research_topic(state["messages"]),
+            summaries="\n---\n\n".join(state["web_research_result"]),
+        )
+
+    llm = ChatGroq(
         model=reasoning_model,
         temperature=0,
         max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
+        api_key=os.getenv("GROQ_API_KEY"),
     )
     result = llm.invoke(formatted_prompt)
 
     # Replace the short urls with the original urls and add all used urls to the sources_gathered
     unique_sources = []
-    for source in state["sources_gathered"]:
+    for source in state.get("sources_gathered", []):
         if source["short_url"] in result.content:
             result.content = result.content.replace(
                 source["short_url"], source["value"]

From 98f16137d2eccfc634ea6d8539d9c1d3074c642a Mon Sep 17 00:00:00 2001
From: MaxAdmk <maksimkeny@gmail.com>
Date: Mon, 19 Jan 2026 18:27:26 +0200
Subject: [PATCH 2/2] Replaced external web search with a local markdown
 directory search.

The search component operates on a user-provided directory and retrieves
relevant documentation snippets directly from markdown files.

Architecture decision:
I chose an extended-context, file-based search approach instead of a
broad snippet search or vector database. Technical documentation often
contains long code examples that are easily truncated by short RAG-style
snippets.

To preserve full examples while respecting Groq token limits, the search
retrieves a small number of highly relevant results (top_k=2) with a
larger context window (~100 lines). Relevance is determined using a
deterministic keyword and phrase scoring mechanism without external
dependencies.

This design keeps the agent lightweight, reproducible, and suitable for
offline evaluation on local documentation archives.
---
 backend/examples/cli_research.py |   6 +
 backend/src/agent/graph.py       | 303 +++++++++++++++++++++----------
 backend/src/agent/prompts.py     |  27 +--
 backend/src/agent/state.py       |   7 +-
 4 files changed, 228 insertions(+), 115 deletions(-)

diff --git a/backend/examples/cli_research.py b/backend/examples/cli_research.py
index eda7ad79b..6df006b0a 100644
--- a/backend/examples/cli_research.py
+++ b/backend/examples/cli_research.py
@@ -7,6 +7,11 @@ def main() -> None:
     """Run the research agent from the command line."""
     parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
     parser.add_argument("question", help="Research question")
+    parser.add_argument(
+        "--dir",
+        required=True,
+        help="Path to a local directory containing .md files for the agent to reference",
+    )
     parser.add_argument(
         "--initial-queries",
         type=int,
@@ -28,6 +33,7 @@ def main() -> None:
 
     state = {
         "messages": [HumanMessage(content=args.question)],
+        "docs_dir": args.dir,
         "initial_search_query_count": args.initial_queries,
         "max_research_loops": args.max_loops,
         "reasoning_model": args.reasoning_model,
diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py
index d336e4aa7..46416626e 100644
--- a/backend/src/agent/graph.py
+++ b/backend/src/agent/graph.py
@@ -7,8 +7,6 @@
 from langgraph.graph import StateGraph
 from langgraph.graph import START, END
 from langchain_core.runnables import RunnableConfig
-from google.genai import Client
-from google.genai.errors import ClientError
 
 from agent.state import (
     OverallState,
@@ -20,29 +18,20 @@
 from agent.prompts import (
     get_current_date,
     query_writer_instructions,
-    web_searcher_instructions,
     reflection_instructions,
     answer_instructions,
 )
 from langchain_groq import ChatGroq
-from agent.utils import (
-    get_citations,
-    get_research_topic,
-    insert_citation_markers,
-    resolve_urls,
-)
+from agent.utils import get_research_topic
+import glob
+import re
+from pathlib import Path
 
 load_dotenv()
 
-if os.getenv("GEMINI_API_KEY") is None:
-    raise ValueError("GEMINI_API_KEY is not set")
-
 if os.getenv("GROQ_API_KEY") is None:
     raise ValueError("GROQ_API_KEY is not set")
 
-# Used for Google Search API
-genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
-
 
 # Nodes
 def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
@@ -60,14 +49,13 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
     """
     configurable = Configuration.from_runnable_config(config)
 
-    # check for custom initial search query count
-    if state.get("initial_search_query_count") is None:
-        state["initial_search_query_count"] = configurable.number_of_initial_queries
+    # Limit query count to reduce token usage
+    state["initial_search_query_count"] = 2
 
     # init Groq LLM
     llm = ChatGroq(
         model=configurable.query_generator_model,
-        temperature=1.0,
+        temperature=0,
         max_retries=2,
         api_key=os.getenv("GROQ_API_KEY"),
     )
@@ -90,74 +78,53 @@ def continue_to_web_research(state: QueryGenerationState):
     This is used to spawn n number of web research nodes, one for each search query.
     """
     return [
-        Send("web_research", {"search_query": search_query, "id": int(idx)})
+        Send(
+            "web_research",
+            {
+                "search_query": search_query,
+                "id": idx,
+                "docs_dir": state.get("docs_dir"),
+            },
+        )
         for idx, search_query in enumerate(state["search_query"])
     ]
 
 
 def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
-    """LangGraph node that performs web research using the native Google Search API tool.
-
-    Executes a web search using the native Google Search API tool.
-
-    Args:
-        state: Current graph state containing the search query and research loop count
-        config: Configuration for the runnable, including search API settings
-
-    Returns:
-        Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
-    """
-    configurable = Configuration.from_runnable_config(config)
-    formatted_prompt = web_searcher_instructions.format(
-        current_date=get_current_date(),
-        research_topic=state["search_query"],
-    )
-
-    try:
-        response = genai_client.models.generate_content(
-            model=configurable.web_search_model,
-            contents=formatted_prompt,
-            config={
-                "tools": [{"google_search": {}}],
-                "temperature": 0,
-            },
-        )
-    except ClientError as e:
-        if getattr(e, 'status_code', None) == 429 or (
-            "RESOURCE_EXHAUSTED" in str(e)
-        ):
-            return {
-                "sources_gathered": [],
-                "web_research_result": [
-                    "Web search skipped: Google Search API quota exceeded (RESOURCE_EXHAUSTED)."
-                ],
-            }
-        else:
-            raise
-    # If the response does not contain grounding metadata, fall back to plain text
-    candidates = getattr(response, "candidates", None) or []
-    grounding_metadata = (
-        getattr(candidates[0], "grounding_metadata", None) if candidates else None
-    )
-    if not grounding_metadata or not getattr(grounding_metadata, "grounding_chunks", None):
-        modified_text = getattr(response, "text", str(response))
-        sources_gathered = []
-    else:
-        # resolve the urls to short urls for saving tokens and time
-        resolved_urls = resolve_urls(
-            grounding_metadata.grounding_chunks,
-            state["id"],
-        )
-        # Gets the citations and adds them to the generated text
-        citations = get_citations(response, resolved_urls)
-        modified_text = insert_citation_markers(response.text, citations)
-        sources_gathered = [
-            item for citation in citations for item in citation["segments"]
-        ]
+    """LangGraph node that performs local markdown search instead of Google Search."""
+    docs_dir = state.get("docs_dir")
+    if not docs_dir:
+        return {
+            "sources_gathered": [],
+            "web_research_result": [
+                "Web search skipped: no docs_dir provided for local markdown search."
+            ],
+            "search_query": [state.get("search_query", "")],
+        }
+
+    search_query = state["search_query"]
+    snippets = _search_markdown_directory(docs_dir, search_query, top_k=2)
+
+    if not snippets:
+        return {
+            "sources_gathered": [],
+            "web_research_result": [
+                f"No local markdown results found for query: {search_query}"
+            ],
+            "search_query": [search_query],
+        }
+
+    sources = []
+    result_chunks = []
+    for idx, (rel_path, snippet) in enumerate(snippets):
+        marker = f"[S{idx}]"
+        sources.append({"short_url": marker, "value": rel_path})
+        result_chunks.append(f"{marker} {rel_path}\n{snippet}")
 
     return {
-        "sources_gathered": sources_gathered,
-        "web_research_result": [modified_text],
+        "sources_gathered": sources,
+        "web_research_result": ["\n\n---\n\n".join(result_chunks)],
+        "search_query": [search_query],
     }
 
 
@@ -233,7 +200,8 @@ def evaluate_research(
                 "web_research",
                 {
                     "search_query": follow_up_query,
-                    "id": state["number_of_ran_queries"] + int(idx),
+                    "id": state["number_of_ran_queries"] + idx,
+                    "docs_dir": state.get("docs_dir"),
                 },
             )
             for idx, follow_up_query in enumerate(state["follow_up_queries"])
@@ -257,22 +225,25 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
     reasoning_model = state.get("reasoning_model") or configurable.answer_model
 
     current_date = get_current_date()
-    if any("Web search skipped" in r for r in state.get("web_research_result", [])):
-        # Fallback prompt: short, explicit about quota, forbids citations/links/outlets
+
+    # Filter out error messages and keep only valid content chunks
+    all_results = state.get("web_research_result", [])
+    valid_chunks = [
+        r for r in all_results
+        if not ("No local markdown results found" in r or "no docs_dir provided" in r or "Web search skipped" in r)
+        and r.strip()
+    ]
+
+    if not valid_chunks:
         formatted_prompt = (
-            "Web search was skipped due to quota limits.\n"
-            "Do NOT include any links, citations, markdown links, or name, suggest, or mention any sources or news outlets—including 'apnews', 'vertexaisearch', or any examples.\n"
-            "Do NOT recommend that the user check sources or news outlets.\n"
-            "\n"
-            "Based only on general knowledge, provide a brief best-effort answer to the following question:\n"
-            f"Question: {get_research_topic(state['messages'])}"
+            "I could not find sufficient information in the provided documentation to answer this question."
         )
     else:
         # Normal prompt with citations if sources exist
         formatted_prompt = answer_instructions.format(
             current_date=current_date,
             research_topic=get_research_topic(state["messages"]),
-            summaries="\n---\n\n".join(state["web_research_result"]),
+            summaries="\n---\n\n".join(valid_chunks),
         )
 
     llm = ChatGroq(
@@ -283,20 +254,152 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
     )
     result = llm.invoke(formatted_prompt)
 
-    # Replace the short urls with the original urls and add all used urls to the sources_gathered
-    unique_sources = []
+    fallback_msg = "I could not find sufficient information in the provided documentation to answer this question."
+    content = result.content if hasattr(result, "content") else ""
+    content = content.strip() if isinstance(content, str) else ""
+
+    if content != fallback_msg and not re.search(r"\[S\d+\]", content):
+        content = fallback_msg
+
+    if content == fallback_msg:
+        return {"messages": [AIMessage(content=fallback_msg)], "sources_gathered": []}
+    if not content:
+        return {"messages": [AIMessage(content=fallback_msg)], "sources_gathered": []}
+
+    source_map: dict[str, str] = {}
     for source in state.get("sources_gathered", []):
-        if source["short_url"] in result.content:
-            result.content = result.content.replace(
-                source["short_url"], source["value"]
-            )
-            unique_sources.append(source)
+        if not isinstance(source, dict):
+            continue
+        key = source.get("short_url")
+        val = source.get("value")
+        if isinstance(key, str) and isinstance(val, str) and key not in source_map:
+            source_map[key] = val
+    used_markers = []
+    for marker in re.findall(r"\[S\d+\]", content):
+        if marker in source_map and marker not in used_markers:
+            used_markers.append(marker)
+
+    if not used_markers:
+        return {"messages": [AIMessage(content=fallback_msg)], "sources_gathered": []}
+
+    sources_lines = [
+        f"- {m} -> [{source_map[m]}]({source_map[m]})"
+        for m in used_markers
+    ]
+    content = f"{content}\n\nSources:\n" + "\n".join(sources_lines)
 
-    return {
-        "messages": [AIMessage(content=result.content)],
-        "sources_gathered": unique_sources,
+    return {"messages": [AIMessage(content=content)], "sources_gathered": state.get("sources_gathered", [])}
+
+
+def _search_markdown_directory(base_dir: str, query: str, top_k: int = 5):
+    """Search recursively for markdown files and return top-k relevant snippets.
+
+    Scores files based on keyword and phrase matching in both file paths and content.
+    Uses a weighted scoring system: path matches (3x), phrase matches (3x), term matches (1x).
+    Requires minimum score threshold (2 term matches or 1 phrase match) to include results.
+    Extracts line-based snippets around the best match position for deterministic grounding.
+
+    Args:
+        base_dir: Base directory path (resolved to absolute) to search recursively for .md files.
+        query: Search query string to match against file content and paths.
+        top_k: Maximum number of results to return (default: 5).
+
+    Returns:
+        List of tuples (relative_path, snippet) sorted by relevance score descending.
+    """
+    base_path = Path(base_dir).resolve()
+    if not base_path.exists():
+        return []
+
+    # Collect files
+    md_files = [Path(p) for p in glob.glob(str(base_path / "**" / "*.md"), recursive=True)]
+    if not md_files:
+        return []
+
+    # Small set of common English stopwords for filtering
+    STOPWORDS = {
+        "the", "a", "an", "in", "on", "of", "for", "to", "and", "or", "is", "are", "was", "were", "with", "by",
+        "at", "it", "as", "that", "from", "be", "this", "which"
     }
 
+    query_lower = query.lower()
+    # Extract individual terms, filter stopwords
+    terms = [t.lower() for t in re.findall(r"\w+", query) if t]
+    terms = [t for t in terms if t not in STOPWORDS]
+
+    # Extract multi-word phrases (2-3 words)
+    phrases = []
+    words = [w for w in re.findall(r"\w+", query_lower) if w not in STOPWORDS]
+    for i in range(len(words) - 1):
+        phrases.append(f"{words[i]} {words[i+1]}")
+    for i in range(len(words) - 2):
+        phrases.append(f"{words[i]} {words[i+1]} {words[i+2]}")
+
+    def score_and_snippet(path: Path):
+        try:
+            text = path.read_text(encoding="utf-8")
+        except Exception:
+            return 0, ""
+
+        rel_path = str(path.relative_to(base_path))
+        path_lower = rel_path.lower()
+        text_lower = text.lower()
+
+        # Score file path matches (boost)
+        path_score = sum(path_lower.count(t) * 3 for t in terms) + sum(path_lower.count(p) * 5 for p in phrases)
+
+        # Score content matches
+        term_score = sum(text_lower.count(t) for t in terms) if terms else 0
+        phrase_score = sum(text_lower.count(p) * 3 for p in phrases)
+
+        # Require at least 2 term matches or 1 phrase match
+        total_score = path_score + term_score + phrase_score
+        if total_score < 2 and phrase_score == 0:
+            return 0, ""
+
+        # Find best match position (prefer phrase matches, then term matches)
+        best_idx = len(text)
+        best_token = None
+        for p in phrases:
+            idx = text_lower.find(p)
+            if idx != -1:
+                best_idx = min(best_idx, idx)
+                if best_token is None or idx == best_idx:
+                    best_token = p
+        if best_idx == len(text):
+            for t in terms:
+                idx = text_lower.find(t)
+                if idx != -1:
+                    best_idx = min(best_idx, idx)
+                    if best_token is None or idx == best_idx:
+                        best_token = t
+
+        # Extract line-based snippet around the best match for more deterministic grounding
+        lines = text.splitlines()
+        if best_token:
+            match_line = 0
+            for i, line in enumerate(lines):
+                if best_token in line.lower():
+                    match_line = i
+                    break
+            # Capture 20 lines before and 100 lines after to ensure we get the full code example
+            start_line = max(0, match_line - 20)
+            end_line = min(len(lines), match_line + 100)
+        else:
+            start_line, end_line = 0, min(len(lines), 120)
+        snippet = "\n".join(lines[start_line:end_line]).strip()
+        return total_score, snippet
+
+    scored = []
+    for path in md_files:
+        score, snippet = score_and_snippet(path)
+        if score > 0 and snippet:
+            rel_path = str(path.relative_to(base_path))
+            scored.append((score, rel_path, snippet))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [(rel_path, snippet) for _, rel_path, snippet in scored[:top_k]]
+
 
 # Create our Agent Graph
 builder = StateGraph(OverallState, config_schema=Configuration)
diff --git a/backend/src/agent/prompts.py b/backend/src/agent/prompts.py
index 8963f6a6e..90539de2e 100644
--- a/backend/src/agent/prompts.py
+++ b/backend/src/agent/prompts.py
@@ -79,18 +79,19 @@ def get_current_date():
 {summaries}
 """
 
-answer_instructions = """Generate a high-quality answer to the user's question based on the provided summaries.
-
-Instructions:
-- The current date is {current_date}.
-- You are the final step of a multi-step research process, don't mention that you are the final step. 
-- You have access to all the information gathered from the previous steps.
-- You have access to the user's question.
-- Generate a high-quality answer to the user's question based on the provided summaries and the user's question.
-- Include the sources you used from the Summaries in the answer correctly, use markdown format (e.g. [apnews](https://vertexaisearch.cloud.google.com/id/1-0)). THIS IS A MUST.
-
-User Context:
-- {research_topic}
+answer_instructions = """Answer the user's question using ONLY the provided documentation snippets.
+
+HARD RULES:
+- Use ONLY information from the snippets below.
+- If the snippets do not contain enough information, output EXACTLY:
+I could not find sufficient information in the provided documentation to answer this question.
+- Every paragraph MUST include at least one citation marker exactly as shown in the snippets (e.g. [S0]).
+- Do NOT invent examples or code.
+- Do NOT mention web sources.
+- Output ONLY the answer (no preamble, no questions).
+
+User Question:
+{research_topic}
 
-Summaries:
+Documentation Snippets:
 {summaries}"""
diff --git a/backend/src/agent/state.py b/backend/src/agent/state.py
index d5ad4dcd8..c4c54b0dd 100644
--- a/backend/src/agent/state.py
+++ b/backend/src/agent/state.py
@@ -15,6 +15,7 @@ class OverallState(TypedDict):
     search_query: Annotated[list, operator.add]
     web_research_result: Annotated[list, operator.add]
     sources_gathered: Annotated[list, operator.add]
+    docs_dir: str
     initial_search_query_count: int
     max_research_loops: int
     research_loop_count: int
@@ -35,12 +36,14 @@ class Query(TypedDict):
 
 
 class QueryGenerationState(TypedDict):
-    search_query: list[Query]
+    search_query: list[str]
+    docs_dir: str
 
 
 class WebSearchState(TypedDict):
     search_query: str
-    id: str
+    id: int
+    docs_dir: str
 
 
 @dataclass(kw_only=True)