From 78dc42fd2911ccecae02d590913b8151e8765ae9 Mon Sep 17 00:00:00 2001 From: MaxAdmk Date: Mon, 19 Jan 2026 15:33:46 +0200 Subject: [PATCH 1/2] Refactor LLM usage to Groq (llama-3.3-70b-versatile) and harden research flow - Migrate all LLM nodes (query generation, reflection, final answer) from Gemini to Groq using llama-3.3-70b-versatile - Keep Gemini exclusively for Google Search grounding to preserve citation metadata - Add graceful handling for Google Search API quota exhaustion (429 / RESOURCE_EXHAUSTED) - Return safe fallback state instead of crashing the graph - Guard against missing or partial API responses - Handle empty candidates and absent grounding metadata - Fall back to plain-text extraction when citations cannot be generated - Improve robustness and documentation - Defensive access to graph state keys - Fix docstring inconsistencies and remove redundant comments --- backend/examples/cli_research.py | 2 +- backend/pyproject.toml | 1 + backend/src/agent/configuration.py | 19 +++-- backend/src/agent/graph.py | 123 ++++++++++++++++++----------- 4 files changed, 93 insertions(+), 52 deletions(-) diff --git a/backend/examples/cli_research.py b/backend/examples/cli_research.py index a086496be..eda7ad79b 100644 --- a/backend/examples/cli_research.py +++ b/backend/examples/cli_research.py @@ -21,7 +21,7 @@ def main() -> None: ) parser.add_argument( "--reasoning-model", - default="gemini-2.5-pro-preview-05-06", + default="llama-3.3-70b-versatile", help="Model for the final answer", ) args = parser.parse_args() diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 09eb59885..d681f1679 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -12,6 +12,7 @@ dependencies = [ "langgraph>=0.2.6", "langchain>=0.3.19", "langchain-google-genai", + "langchain-groq", "python-dotenv>=1.0.1", "langgraph-sdk>=0.1.57", "langgraph-cli", diff --git a/backend/src/agent/configuration.py b/backend/src/agent/configuration.py index e57122d23..75a24a579 100644 --- a/backend/src/agent/configuration.py +++ b/backend/src/agent/configuration.py @@ -9,23 +9,30 @@ class Configuration(BaseModel): """The configuration for the agent.""" query_generator_model: str = Field( - default="gemini-2.0-flash", + default="llama-3.3-70b-versatile", metadata={ - "description": "The name of the language model to use for the agent's query generation." + "description": "Groq model name to use for the agent's query generation." }, ) reflection_model: str = Field( - default="gemini-2.5-flash", + default="llama-3.3-70b-versatile", metadata={ - "description": "The name of the language model to use for the agent's reflection." + "description": "Groq model name to use for the agent's reflection." }, ) answer_model: str = Field( - default="gemini-2.5-pro", + default="llama-3.3-70b-versatile", + metadata={ + "description": "Groq model name to use for the agent's answer." + }, + ) + + web_search_model: str = Field( + default="gemini-2.0-flash", metadata={ - "description": "The name of the language model to use for the agent's answer." + "description": "Gemini model name to use for Google Search grounding." }, ) diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py index 0f19c3f2f..d336e4aa7 100644 --- a/backend/src/agent/graph.py +++ b/backend/src/agent/graph.py @@ -8,6 +8,7 @@ from langgraph.graph import START, END from langchain_core.runnables import RunnableConfig from google.genai import Client +from google.genai.errors import ClientError from agent.state import ( OverallState, @@ -23,7 +24,7 @@ reflection_instructions, answer_instructions, ) -from langchain_google_genai import ChatGoogleGenerativeAI +from langchain_groq import ChatGroq from agent.utils import ( get_citations, get_research_topic, @@ -36,6 +37,9 @@ if os.getenv("GEMINI_API_KEY") is None: raise ValueError("GEMINI_API_KEY is not set") +if os.getenv("GROQ_API_KEY") is None: + raise ValueError("GROQ_API_KEY is not set") + # Used for Google Search API genai_client = Client(api_key=os.getenv("GEMINI_API_KEY")) @@ -44,8 +48,8 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState: """LangGraph node that generates search queries based on the User's question. - Uses Gemini 2.0 Flash to create an optimized search queries for web research based on - the User's question. + Uses Groq (llama-3.3-70b-versatile) to create optimized search queries for web + research based on the User's question. Args: state: Current graph state containing the User's question @@ -60,16 +64,15 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati if state.get("initial_search_query_count") is None: state["initial_search_query_count"] = configurable.number_of_initial_queries - # init Gemini 2.0 Flash - llm = ChatGoogleGenerativeAI( + # init Groq LLM + llm = ChatGroq( model=configurable.query_generator_model, temperature=1.0, max_retries=2, - api_key=os.getenv("GEMINI_API_KEY"), + api_key=os.getenv("GROQ_API_KEY"), ) structured_llm = llm.with_structured_output(SearchQueryList) - # Format the prompt current_date = get_current_date() formatted_prompt = query_writer_instructions.format( current_date=current_date, @@ -95,7 +98,7 @@ def continue_to_web_research(state: QueryGenerationState): def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState: """LangGraph node that performs web research using the native Google Search API tool. - Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash. + Executes a web search using the native Google Search API tool. Args: state: Current graph state containing the search query and research loop count @@ -104,34 +107,56 @@ def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState: Returns: Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results """ - # Configure configurable = Configuration.from_runnable_config(config) formatted_prompt = web_searcher_instructions.format( current_date=get_current_date(), research_topic=state["search_query"], ) - # Uses the google genai client as the langchain client doesn't return grounding metadata - response = genai_client.models.generate_content( - model=configurable.query_generator_model, - contents=formatted_prompt, - config={ - "tools": [{"google_search": {}}], - "temperature": 0, - }, - ) - # resolve the urls to short urls for saving tokens and time - resolved_urls = resolve_urls( - response.candidates[0].grounding_metadata.grounding_chunks, state["id"] + try: + response = genai_client.models.generate_content( + model=configurable.web_search_model, + contents=formatted_prompt, + config={ + "tools": [{"google_search": {}}], + "temperature": 0, + }, + ) + except ClientError as e: + if getattr(e, 'status_code', None) == 429 or ( + "RESOURCE_EXHAUSTED" in str(e) + ): + return { + "sources_gathered": [], + "web_research_result": [ + "Web search skipped: Google Search API quota exceeded (RESOURCE_EXHAUSTED)." + ], + } + else: + raise + # If the response does not contain grounding metadata, fall back to plain text + candidates = getattr(response, "candidates", None) or [] + grounding_metadata = ( + getattr(candidates[0], "grounding_metadata", None) if candidates else None ) - # Gets the citations and adds them to the generated text - citations = get_citations(response, resolved_urls) - modified_text = insert_citation_markers(response.text, citations) - sources_gathered = [item for citation in citations for item in citation["segments"]] + if not grounding_metadata or not getattr(grounding_metadata, "grounding_chunks", None): + modified_text = getattr(response, "text", str(response)) + sources_gathered = [] + else: + # resolve the urls to short urls for saving tokens and time + resolved_urls = resolve_urls( + grounding_metadata.grounding_chunks, + state["id"], + ) + # Gets the citations and adds them to the generated text + citations = get_citations(response, resolved_urls) + modified_text = insert_citation_markers(response.text, citations) + sources_gathered = [ + item for citation in citations for item in citation["segments"] + ] return { "sources_gathered": sources_gathered, - "search_query": [state["search_query"]], "web_research_result": [modified_text], } @@ -148,26 +173,24 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState: config: Configuration for the runnable, including LLM provider settings Returns: - Dictionary with state update, including search_query key containing the generated follow-up query + Dictionary with state update including is_sufficient, knowledge_gap, follow_up_queries, research_loop_count, and number_of_ran_queries """ configurable = Configuration.from_runnable_config(config) # Increment the research loop count and get the reasoning model state["research_loop_count"] = state.get("research_loop_count", 0) + 1 reasoning_model = state.get("reasoning_model", configurable.reflection_model) - # Format the prompt current_date = get_current_date() formatted_prompt = reflection_instructions.format( current_date=current_date, research_topic=get_research_topic(state["messages"]), - summaries="\n\n---\n\n".join(state["web_research_result"]), + summaries="\n\n---\n\n".join(state["web_research_result"]), ) - # init Reasoning Model - llm = ChatGoogleGenerativeAI( + llm = ChatGroq( model=reasoning_model, temperature=1.0, max_retries=2, - api_key=os.getenv("GEMINI_API_KEY"), + api_key=os.getenv("GROQ_API_KEY"), ) result = llm.with_structured_output(Reflection).invoke(formatted_prompt) @@ -176,7 +199,7 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState: "knowledge_gap": result.knowledge_gap, "follow_up_queries": result.follow_up_queries, "research_loop_count": state["research_loop_count"], - "number_of_ran_queries": len(state["search_query"]), + "number_of_ran_queries": len(state.get("search_query", [])), } @@ -194,7 +217,7 @@ def evaluate_research( config: Configuration for the runnable, including max_research_loops setting Returns: - String literal indicating the next node to visit ("web_research" or "finalize_summary") + String "finalize_answer" or list of Send objects for "web_research" nodes """ configurable = Configuration.from_runnable_config(config) max_research_loops = ( @@ -233,26 +256,36 @@ def finalize_answer(state: OverallState, config: RunnableConfig): configurable = Configuration.from_runnable_config(config) reasoning_model = state.get("reasoning_model") or configurable.answer_model - # Format the prompt current_date = get_current_date() - formatted_prompt = answer_instructions.format( - current_date=current_date, - research_topic=get_research_topic(state["messages"]), - summaries="\n---\n\n".join(state["web_research_result"]), - ) - - # init Reasoning Model, default to Gemini 2.5 Flash - llm = ChatGoogleGenerativeAI( + if any("Web search skipped" in r for r in state.get("web_research_result", [])): + # Fallback prompt: short, explicit about quota, forbids citations/links/outlets + formatted_prompt = ( + "Web search was skipped due to quota limits.\n" + "Do NOT include any links, citations, markdown links, or name, suggest, or mention any sources or news outlets—including 'apnews', 'vertexaisearch', or any examples.\n" + "Do NOT recommend that the user check sources or news outlets.\n" + "\n" + "Based only on general knowledge, provide a brief best-effort answer to the following question:\n" + f"Question: {get_research_topic(state['messages'])}" + ) + else: + # Normal prompt with citations if sources exist + formatted_prompt = answer_instructions.format( + current_date=current_date, + research_topic=get_research_topic(state["messages"]), + summaries="\n---\n\n".join(state["web_research_result"]), + ) + + llm = ChatGroq( model=reasoning_model, temperature=0, max_retries=2, - api_key=os.getenv("GEMINI_API_KEY"), + api_key=os.getenv("GROQ_API_KEY"), ) result = llm.invoke(formatted_prompt) # Replace the short urls with the original urls and add all used urls to the sources_gathered unique_sources = [] - for source in state["sources_gathered"]: + for source in state.get("sources_gathered", []): if source["short_url"] in result.content: result.content = result.content.replace( source["short_url"], source["value"] From 98f16137d2eccfc634ea6d8539d9c1d3074c642a Mon Sep 17 00:00:00 2001 From: MaxAdmk Date: Mon, 19 Jan 2026 18:27:26 +0200 Subject: [PATCH 2/2] Replaced external web search with a local markdown directory search. The search component operates on a user-provided directory and retrieves relevant documentation snippets directly from markdown files. Architecture decision: I chose an extended-context, file-based search approach instead of a broad snippet search or vector database. Technical documentation often contains long code examples that are easily truncated by short RAG-style snippets. To preserve full examples while respecting Groq token limits, the search retrieves a small number of highly relevant results (top_k=2) with a larger context window (~100 lines). Relevance is determined using a deterministic keyword and phrase scoring mechanism without external dependencies. This design keeps the agent lightweight, reproducible, and suitable for offline evaluation on local documentation archives. --- backend/examples/cli_research.py | 6 + backend/src/agent/graph.py | 303 +++++++++++++++++++++---------- backend/src/agent/prompts.py | 27 +-- backend/src/agent/state.py | 7 +- 4 files changed, 228 insertions(+), 115 deletions(-) diff --git a/backend/examples/cli_research.py b/backend/examples/cli_research.py index eda7ad79b..6df006b0a 100644 --- a/backend/examples/cli_research.py +++ b/backend/examples/cli_research.py @@ -7,6 +7,11 @@ def main() -> None: """Run the research agent from the command line.""" parser = argparse.ArgumentParser(description="Run the LangGraph research agent") parser.add_argument("question", help="Research question") + parser.add_argument( + "--dir", + required=True, + help="Path to a local directory containing .md files for the agent to reference", + ) parser.add_argument( "--initial-queries", type=int, @@ -28,6 +33,7 @@ def main() -> None: state = { "messages": [HumanMessage(content=args.question)], + "docs_dir": args.dir, "initial_search_query_count": args.initial_queries, "max_research_loops": args.max_loops, "reasoning_model": args.reasoning_model, diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py index d336e4aa7..46416626e 100644 --- a/backend/src/agent/graph.py +++ b/backend/src/agent/graph.py @@ -7,8 +7,6 @@ from langgraph.graph import StateGraph from langgraph.graph import START, END from langchain_core.runnables import RunnableConfig -from google.genai import Client -from google.genai.errors import ClientError from agent.state import ( OverallState, @@ -20,29 +18,20 @@ from agent.prompts import ( get_current_date, query_writer_instructions, - web_searcher_instructions, reflection_instructions, answer_instructions, ) from langchain_groq import ChatGroq -from agent.utils import ( - get_citations, - get_research_topic, - insert_citation_markers, - resolve_urls, -) +from agent.utils import get_research_topic +import glob +import re +from pathlib import Path load_dotenv() -if os.getenv("GEMINI_API_KEY") is None: - raise ValueError("GEMINI_API_KEY is not set") - if os.getenv("GROQ_API_KEY") is None: raise ValueError("GROQ_API_KEY is not set") -# Used for Google Search API -genai_client = Client(api_key=os.getenv("GEMINI_API_KEY")) - # Nodes def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState: @@ -60,14 +49,13 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati """ configurable = Configuration.from_runnable_config(config) - # check for custom initial search query count - if state.get("initial_search_query_count") is None: - state["initial_search_query_count"] = configurable.number_of_initial_queries + # Limit query count to reduce token usage + state["initial_search_query_count"] = 2 # init Groq LLM llm = ChatGroq( model=configurable.query_generator_model, - temperature=1.0, + temperature=0, max_retries=2, api_key=os.getenv("GROQ_API_KEY"), ) @@ -90,74 +78,53 @@ def continue_to_web_research(state: QueryGenerationState): This is used to spawn n number of web research nodes, one for each search query. """ return [ - Send("web_research", {"search_query": search_query, "id": int(idx)}) + Send( + "web_research", + { + "search_query": search_query, + "id": idx, + "docs_dir": state.get("docs_dir"), + }, + ) for idx, search_query in enumerate(state["search_query"]) ] def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState: - """LangGraph node that performs web research using the native Google Search API tool. - - Executes a web search using the native Google Search API tool. - - Args: - state: Current graph state containing the search query and research loop count - config: Configuration for the runnable, including search API settings - - Returns: - Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results - """ - configurable = Configuration.from_runnable_config(config) - formatted_prompt = web_searcher_instructions.format( - current_date=get_current_date(), - research_topic=state["search_query"], - ) - - try: - response = genai_client.models.generate_content( - model=configurable.web_search_model, - contents=formatted_prompt, - config={ - "tools": [{"google_search": {}}], - "temperature": 0, - }, - ) - except ClientError as e: - if getattr(e, 'status_code', None) == 429 or ( - "RESOURCE_EXHAUSTED" in str(e) - ): - return { - "sources_gathered": [], - "web_research_result": [ - "Web search skipped: Google Search API quota exceeded (RESOURCE_EXHAUSTED)." - ], - } - else: - raise - # If the response does not contain grounding metadata, fall back to plain text - candidates = getattr(response, "candidates", None) or [] - grounding_metadata = ( - getattr(candidates[0], "grounding_metadata", None) if candidates else None - ) - if not grounding_metadata or not getattr(grounding_metadata, "grounding_chunks", None): - modified_text = getattr(response, "text", str(response)) - sources_gathered = [] - else: - # resolve the urls to short urls for saving tokens and time - resolved_urls = resolve_urls( - grounding_metadata.grounding_chunks, - state["id"], - ) - # Gets the citations and adds them to the generated text - citations = get_citations(response, resolved_urls) - modified_text = insert_citation_markers(response.text, citations) - sources_gathered = [ - item for citation in citations for item in citation["segments"] - ] + """LangGraph node that performs local markdown search instead of Google Search.""" + docs_dir = state.get("docs_dir") + if not docs_dir: + return { + "sources_gathered": [], + "web_research_result": [ + "Web search skipped: no docs_dir provided for local markdown search." + ], + "search_query": [state.get("search_query", "")], + } + + search_query = state["search_query"] + snippets = _search_markdown_directory(docs_dir, search_query, top_k=2) + + if not snippets: + return { + "sources_gathered": [], + "web_research_result": [ + f"No local markdown results found for query: {search_query}" + ], + "search_query": [search_query], + } + + sources = [] + result_chunks = [] + for idx, (rel_path, snippet) in enumerate(snippets): + marker = f"[S{idx}]" + sources.append({"short_url": marker, "value": rel_path}) + result_chunks.append(f"{marker} {rel_path}\n{snippet}") return { - "sources_gathered": sources_gathered, - "web_research_result": [modified_text], + "sources_gathered": sources, + "web_research_result": ["\n\n---\n\n".join(result_chunks)], + "search_query": [search_query], } @@ -233,7 +200,8 @@ def evaluate_research( "web_research", { "search_query": follow_up_query, - "id": state["number_of_ran_queries"] + int(idx), + "id": state["number_of_ran_queries"] + idx, + "docs_dir": state.get("docs_dir"), }, ) for idx, follow_up_query in enumerate(state["follow_up_queries"]) @@ -257,22 +225,25 @@ def finalize_answer(state: OverallState, config: RunnableConfig): reasoning_model = state.get("reasoning_model") or configurable.answer_model current_date = get_current_date() - if any("Web search skipped" in r for r in state.get("web_research_result", [])): - # Fallback prompt: short, explicit about quota, forbids citations/links/outlets + + # Filter out error messages and keep only valid content chunks + all_results = state.get("web_research_result", []) + valid_chunks = [ + r for r in all_results + if not ("No local markdown results found" in r or "no docs_dir provided" in r or "Web search skipped" in r) + and r.strip() + ] + + if not valid_chunks: formatted_prompt = ( - "Web search was skipped due to quota limits.\n" - "Do NOT include any links, citations, markdown links, or name, suggest, or mention any sources or news outlets—including 'apnews', 'vertexaisearch', or any examples.\n" - "Do NOT recommend that the user check sources or news outlets.\n" - "\n" - "Based only on general knowledge, provide a brief best-effort answer to the following question:\n" - f"Question: {get_research_topic(state['messages'])}" + "I could not find sufficient information in the provided documentation to answer this question." ) else: # Normal prompt with citations if sources exist formatted_prompt = answer_instructions.format( current_date=current_date, research_topic=get_research_topic(state["messages"]), - summaries="\n---\n\n".join(state["web_research_result"]), + summaries="\n---\n\n".join(valid_chunks), ) llm = ChatGroq( @@ -283,20 +254,152 @@ def finalize_answer(state: OverallState, config: RunnableConfig): ) result = llm.invoke(formatted_prompt) - # Replace the short urls with the original urls and add all used urls to the sources_gathered - unique_sources = [] + fallback_msg = "I could not find sufficient information in the provided documentation to answer this question." + content = result.content if hasattr(result, "content") else "" + content = content.strip() if isinstance(content, str) else "" + + if content != fallback_msg and not re.search(r"\[S\d+\]", content): + content = fallback_msg + + if content == fallback_msg: + return {"messages": [AIMessage(content=fallback_msg)], "sources_gathered": []} + if not content: + return {"messages": [AIMessage(content=fallback_msg)], "sources_gathered": []} + + source_map: dict[str, str] = {} for source in state.get("sources_gathered", []): - if source["short_url"] in result.content: - result.content = result.content.replace( - source["short_url"], source["value"] - ) - unique_sources.append(source) + if not isinstance(source, dict): + continue + key = source.get("short_url") + val = source.get("value") + if isinstance(key, str) and isinstance(val, str) and key not in source_map: + source_map[key] = val + used_markers = [] + for marker in re.findall(r"\[S\d+\]", content): + if marker in source_map and marker not in used_markers: + used_markers.append(marker) + + if not used_markers: + return {"messages": [AIMessage(content=fallback_msg)], "sources_gathered": []} + + sources_lines = [ + f"- {m} -> [{source_map[m]}]({source_map[m]})" + for m in used_markers + ] + content = f"{content}\n\nSources:\n" + "\n".join(sources_lines) - return { - "messages": [AIMessage(content=result.content)], - "sources_gathered": unique_sources, + return {"messages": [AIMessage(content=content)], "sources_gathered": state.get("sources_gathered", [])} + + +def _search_markdown_directory(base_dir: str, query: str, top_k: int = 5): + """Search recursively for markdown files and return top-k relevant snippets. + + Scores files based on keyword and phrase matching in both file paths and content. + Uses a weighted scoring system: path matches (3x), phrase matches (3x), term matches (1x). + Requires minimum score threshold (2 term matches or 1 phrase match) to include results. + Extracts line-based snippets around the best match position for deterministic grounding. + + Args: + base_dir: Base directory path (resolved to absolute) to search recursively for .md files. + query: Search query string to match against file content and paths. + top_k: Maximum number of results to return (default: 5). + + Returns: + List of tuples (relative_path, snippet) sorted by relevance score descending. + """ + base_path = Path(base_dir).resolve() + if not base_path.exists(): + return [] + + # Collect files + md_files = [Path(p) for p in glob.glob(str(base_path / "**" / "*.md"), recursive=True)] + if not md_files: + return [] + + # Small set of common English stopwords for filtering + STOPWORDS = { + "the", "a", "an", "in", "on", "of", "for", "to", "and", "or", "is", "are", "was", "were", "with", "by", + "at", "it", "as", "that", "from", "be", "this", "which" } + query_lower = query.lower() + # Extract individual terms, filter stopwords + terms = [t.lower() for t in re.findall(r"\w+", query) if t] + terms = [t for t in terms if t not in STOPWORDS] + + # Extract multi-word phrases (2-3 words) + phrases = [] + words = [w for w in re.findall(r"\w+", query_lower) if w not in STOPWORDS] + for i in range(len(words) - 1): + phrases.append(f"{words[i]} {words[i+1]}") + for i in range(len(words) - 2): + phrases.append(f"{words[i]} {words[i+1]} {words[i+2]}") + + def score_and_snippet(path: Path): + try: + text = path.read_text(encoding="utf-8") + except Exception: + return 0, "" + + rel_path = str(path.relative_to(base_path)) + path_lower = rel_path.lower() + text_lower = text.lower() + + # Score file path matches (boost) + path_score = sum(path_lower.count(t) * 3 for t in terms) + sum(path_lower.count(p) * 5 for p in phrases) + + # Score content matches + term_score = sum(text_lower.count(t) for t in terms) if terms else 0 + phrase_score = sum(text_lower.count(p) * 3 for p in phrases) + + # Require at least 2 term matches or 1 phrase match + total_score = path_score + term_score + phrase_score + if total_score < 2 and phrase_score == 0: + return 0, "" + + # Find best match position (prefer phrase matches, then term matches) + best_idx = len(text) + best_token = None + for p in phrases: + idx = text_lower.find(p) + if idx != -1: + best_idx = min(best_idx, idx) + if best_token is None or idx == best_idx: + best_token = p + if best_idx == len(text): + for t in terms: + idx = text_lower.find(t) + if idx != -1: + best_idx = min(best_idx, idx) + if best_token is None or idx == best_idx: + best_token = t + + # Extract line-based snippet around the best match for more deterministic grounding + lines = text.splitlines() + if best_token: + match_line = 0 + for i, line in enumerate(lines): + if best_token in line.lower(): + match_line = i + break + # Capture 20 lines before and 100 lines after to ensure we get the full code example + start_line = max(0, match_line - 20) + end_line = min(len(lines), match_line + 100) + else: + start_line, end_line = 0, min(len(lines), 120) + snippet = "\n".join(lines[start_line:end_line]).strip() + return total_score, snippet + + scored = [] + for path in md_files: + score, snippet = score_and_snippet(path) + if score > 0 and snippet: + rel_path = str(path.relative_to(base_path)) + scored.append((score, rel_path, snippet)) + + scored.sort(key=lambda x: x[0], reverse=True) + return [(rel_path, snippet) for _, rel_path, snippet in scored[:top_k]] + # Create our Agent Graph builder = StateGraph(OverallState, config_schema=Configuration) diff --git a/backend/src/agent/prompts.py b/backend/src/agent/prompts.py index 8963f6a6e..90539de2e 100644 --- a/backend/src/agent/prompts.py +++ b/backend/src/agent/prompts.py @@ -79,18 +79,19 @@ def get_current_date(): {summaries} """ -answer_instructions = """Generate a high-quality answer to the user's question based on the provided summaries. - -Instructions: -- The current date is {current_date}. -- You are the final step of a multi-step research process, don't mention that you are the final step. -- You have access to all the information gathered from the previous steps. -- You have access to the user's question. -- Generate a high-quality answer to the user's question based on the provided summaries and the user's question. -- Include the sources you used from the Summaries in the answer correctly, use markdown format (e.g. [apnews](https://vertexaisearch.cloud.google.com/id/1-0)). THIS IS A MUST. - -User Context: -- {research_topic} +answer_instructions = """Answer the user's question using ONLY the provided documentation snippets. + +HARD RULES: +- Use ONLY information from the snippets below. +- If the snippets do not contain enough information, output EXACTLY: +I could not find sufficient information in the provided documentation to answer this question. +- Every paragraph MUST include at least one citation marker exactly as shown in the snippets (e.g. [S0]). +- Do NOT invent examples or code. +- Do NOT mention web sources. +- Output ONLY the answer (no preamble, no questions). + +User Question: +{research_topic} -Summaries: +Documentation Snippets: {summaries}""" diff --git a/backend/src/agent/state.py b/backend/src/agent/state.py index d5ad4dcd8..c4c54b0dd 100644 --- a/backend/src/agent/state.py +++ b/backend/src/agent/state.py @@ -15,6 +15,7 @@ class OverallState(TypedDict): search_query: Annotated[list, operator.add] web_research_result: Annotated[list, operator.add] sources_gathered: Annotated[list, operator.add] + docs_dir: str initial_search_query_count: int max_research_loops: int research_loop_count: int @@ -35,12 +36,14 @@ class Query(TypedDict): class QueryGenerationState(TypedDict): - search_query: list[Query] + search_query: list[str] + docs_dir: str class WebSearchState(TypedDict): search_query: str - id: str + id: int + docs_dir: str @dataclass(kw_only=True)