google-gemini · LynAlinka · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026 · Jan 2, 2026
diff --git a/backend/examples/cli_research.py b/backend/examples/cli_research.py
@@ -2,9 +2,7 @@
 from langchain_core.messages import HumanMessage
 from agent.graph import graph
 
-
 def main() -> None:
-    """Run the research agent from the command line."""
     parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
-    parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
+    """Run the research agent from the command line."""
+    parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
-    parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
+    """Run the research agent from the command line."""
+    parser = argparse.ArgumentParser(description="Run the LangGraph research agent")
     parser.add_argument("question", help="Research question")
     parser.add_argument(
@@ -21,23 +19,30 @@ def main() -> None:
     )
     parser.add_argument(
         "--reasoning-model",
-        default="gemini-2.5-pro-preview-05-06",
+        default="llama-3.3-70b-versatile",
         help="Model for the final answer",
     )
+    parser.add_argument(
+        "--dir", 
+        type=str, 
+        help="Directory to search in", 
+        default=None
+    )
     args = parser.parse_args()
 
+    config = {"configurable": {"search_dir": args.dir}}
+
     state = {
         "messages": [HumanMessage(content=args.question)],
         "initial_search_query_count": args.initial_queries,
         "max_research_loops": args.max_loops,
         "reasoning_model": args.reasoning_model,
     }
 
-    result = graph.invoke(state)
+    result = graph.invoke(state, config=config)
     messages = result.get("messages", [])
     if messages:
         print(messages[-1].content)
 
-
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/backend/src/agent/graph.py b/backend/src/agent/graph.py
@@ -2,12 +2,12 @@
 
 from agent.tools_and_schemas import SearchQueryList, Reflection
 from dotenv import load_dotenv
-from langchain_core.messages import AIMessage
+from langchain_core.messages import AIMessage, SystemMessage
 from langgraph.types import Send
 from langgraph.graph import StateGraph
 from langgraph.graph import START, END
 from langchain_core.runnables import RunnableConfig
-from google.genai import Client
+from langchain_groq import ChatGroq
 
 from agent.state import (
     OverallState,
@@ -23,7 +23,7 @@
     reflection_instructions,
     answer_instructions,
 )
-from langchain_google_genai import ChatGoogleGenerativeAI
+from google.genai import Client
 from agent.utils import (
     get_citations,
     get_research_topic,
@@ -36,15 +36,24 @@
 if os.getenv("GEMINI_API_KEY") is None:
     raise ValueError("GEMINI_API_KEY is not set")
 
+if os.getenv("GROQ_API_KEY") is None:
+    raise ValueError("GROQ_API_KEY is not set")
+
 # Used for Google Search API
 genai_client = Client(api_key=os.getenv("GEMINI_API_KEY"))
 
+# Initialize Groq LLM for research tasks
+groq_llm = ChatGroq(
+    model="llama-3.3-70b-versatile",
+    temperature=0,
+    api_key=os.getenv("GROQ_API_KEY")
+)
 
 # Nodes
 def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerationState:
     """LangGraph node that generates search queries based on the User's question.
 
-    Uses Gemini 2.0 Flash to create an optimized search queries for web research based on
+    Uses Llama 3.3 70B via Groq to create an optimized search queries for web research based on
     the User's question.
 
     Args:
@@ -60,14 +69,8 @@ def generate_query(state: OverallState, config: RunnableConfig) -> QueryGenerati
     if state.get("initial_search_query_count") is None:
         state["initial_search_query_count"] = configurable.number_of_initial_queries
 
-    # init Gemini 2.0 Flash
-    llm = ChatGoogleGenerativeAI(
-        model=configurable.query_generator_model,
-        temperature=1.0,
-        max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
-    )
-    structured_llm = llm.with_structured_output(SearchQueryList)
+    # init Groq structured output
+    structured_llm = groq_llm.with_structured_output(SearchQueryList)
 
     # Format the prompt
     current_date = get_current_date()
@@ -93,46 +96,47 @@ def continue_to_web_research(state: QueryGenerationState):
 
 
 def web_research(state: WebSearchState, config: RunnableConfig) -> OverallState:
-    """LangGraph node that performs web research using the native Google Search API tool.
-
-    Executes a web search using the native Google Search API tool in combination with Gemini 2.0 Flash.
+    """LangGraph node that performs local directory research instead of Google Search.
 
     Args:
-        state: Current graph state containing the search query and research loop count
-        config: Configuration for the runnable, including search API settings
+        state: Current graph state containing the search query
+        config: Configuration containing the 'search_dir' parameter
 
     Returns:
-        Dictionary with state update, including sources_gathered, research_loop_count, and web_research_results
+        Dictionary with state update, including sources_gathered and web_research_results
     """
-    # Configure
-    configurable = Configuration.from_runnable_config(config)
-    formatted_prompt = web_searcher_instructions.format(
-        current_date=get_current_date(),
-        research_topic=state["search_query"],
-    )
-
-    # Uses the google genai client as the langchain client doesn't return grounding metadata
-    response = genai_client.models.generate_content(
-        model=configurable.query_generator_model,
-        contents=formatted_prompt,
-        config={
-            "tools": [{"google_search": {}}],
-            "temperature": 0,
-        },
-    )
-    # resolve the urls to short urls for saving tokens and time
-    resolved_urls = resolve_urls(
-        response.candidates[0].grounding_metadata.grounding_chunks, state["id"]
-    )
-    # Gets the citations and adds them to the generated text
-    citations = get_citations(response, resolved_urls)
-    modified_text = insert_citation_markers(response.text, citations)
-    sources_gathered = [item for citation in citations for item in citation["segments"]]
+    # Get the search directory from the config passed via cli_research.py
+    search_dir = config.get("configurable", {}).get("search_dir")
+
+    if not search_dir:
+        return {"messages": [SystemMessage(content="Error: Directory for local search not specified.")]}
+
+    results = []
+    try:
+        # Recursively crawl the directory
+        for root, dirs, files in os.walk(search_dir):
+            for file in files:
+                # Target common text-based files
+                if file.endswith((".txt", ".md", ".py", ".json", ".csv")):
+                    path = os.path.join(root, file)
+                    try:
+                        with open(path, 'r', encoding='utf-8', errors='ignore') as f:
+                            content = f.read()
+                            # Check if the generated search query exists in the file content
+                            query = state.get("search_query")
+                            if query and query.lower() in content.lower():
+                                results.append(f"Source: {path}\nContent snippet: {content[:500]}...\n")
+                    except Exception:
+                        continue
-                    except Exception:
-                        continue
+                    except (IOError, OSError):
+                        continue
-                    except Exception:
-                        continue
+                    except (IOError, OSError):
+                        continue
+    except Exception as e:
+        return {"messages": [SystemMessage(content=f"FileSystem Error: {str(e)}")]}
+
+    final_results = "\n".join(results) if results else "No relevant information found in the specified local directory."
 
     return {
-        "sources_gathered": sources_gathered,
+        "sources_gathered": [], # Local files don't need complex grounding metadata for this task
         "search_query": [state["search_query"]],
-        "web_research_result": [modified_text],
+        "web_research_result": [final_results],
     }
 
 
@@ -150,10 +154,8 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
     Returns:
         Dictionary with state update, including search_query key containing the generated follow-up query
     """
-    configurable = Configuration.from_runnable_config(config)
     # Increment the research loop count and get the reasoning model
     state["research_loop_count"] = state.get("research_loop_count", 0) + 1
-    reasoning_model = state.get("reasoning_model", configurable.reflection_model)
 
     # Format the prompt
     current_date = get_current_date()
@@ -162,14 +164,8 @@ def reflection(state: OverallState, config: RunnableConfig) -> ReflectionState:
         research_topic=get_research_topic(state["messages"]),
         summaries="\n\n---\n\n".join(state["web_research_result"]),
     )
-    # init Reasoning Model
-    llm = ChatGoogleGenerativeAI(
-        model=reasoning_model,
-        temperature=1.0,
-        max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
-    )
-    result = llm.with_structured_output(Reflection).invoke(formatted_prompt)
+    # init Reasoning Model via Groq
+    result = groq_llm.with_structured_output(Reflection).invoke(formatted_prompt)
 
     return {
         "is_sufficient": result.is_sufficient,
@@ -220,19 +216,15 @@ def evaluate_research(
 def finalize_answer(state: OverallState, config: RunnableConfig):
     """LangGraph node that finalizes the research summary.
 
-    Prepares the final output by deduplicating and formatting sources, then
-    combining them with the running summary to create a well-structured
-    research report with proper citations.
+    Prepares the final output by combining sources gathered with the running summary 
+    to create a well-structured research report.
 
     Args:
         state: Current graph state containing the running summary and sources gathered
 
     Returns:
-        Dictionary with state update, including running_summary key containing the formatted final summary with sources
+        Dictionary with state update, including running_summary key containing the formatted final summary
     """
-    configurable = Configuration.from_runnable_config(config)
-    reasoning_model = state.get("reasoning_model") or configurable.answer_model
-
     # Format the prompt
     current_date = get_current_date()
     formatted_prompt = answer_instructions.format(
@@ -241,27 +233,11 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
         summaries="\n---\n\n".join(state["web_research_result"]),
     )
 
-    # init Reasoning Model, default to Gemini 2.5 Flash
-    llm = ChatGoogleGenerativeAI(
-        model=reasoning_model,
-        temperature=0,
-        max_retries=2,
-        api_key=os.getenv("GEMINI_API_KEY"),
-    )
-    result = llm.invoke(formatted_prompt)
-
-    # Replace the short urls with the original urls and add all used urls to the sources_gathered
-    unique_sources = []
-    for source in state["sources_gathered"]:
-        if source["short_url"] in result.content:
-            result.content = result.content.replace(
-                source["short_url"], source["value"]
-            )
-            unique_sources.append(source)
+    # init Reasoning Model via Groq
+    result = groq_llm.invoke(formatted_prompt)
 
     return {
         "messages": [AIMessage(content=result.content)],
-        "sources_gathered": unique_sources,
     }
 
 
@@ -275,7 +251,6 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
 builder.add_node("finalize_answer", finalize_answer)
 
 # Set the entrypoint as `generate_query`
-# This means that this node is the first one called
 builder.add_edge(START, "generate_query")
 # Add conditional edge to continue with search queries in a parallel branch
 builder.add_conditional_edges(
@@ -290,4 +265,4 @@ def finalize_answer(state: OverallState, config: RunnableConfig):
 # Finalize the answer
 builder.add_edge("finalize_answer", END)
 
-graph = builder.compile(name="pro-search-agent")
+graph = builder.compile(name="pro-search-agent")