LangGraph快速搭建新一代信息检索增强工具DeepResearch - AI创想

langgpraph_deepresearch/
├── .env # 环境变量配置
├── langgraph.json # LangGraph配置文件
├── requirements.txt # 项目依赖
└── graph.py # 核心图结构实现

复制代码

langgraph
langchain-core
langchain-deepseek
python-dotenv
langsmith
pydantic
matplotlib
seaborn
pandas
IPython
langchain_mcdapters
uv

复制代码

DEEPSEEK_API_KEY ='****'
TAVILY_API_KEY ='tvly-dev-******'
LANGSMITH_API_KEY ='lsv2_pt_**********'
LANGSMITH_TRACING =trueLANGSMITH_PROJECT='langgpraph_deepresearch'

复制代码

{"dependencies":["./"],"graphs":{"langgpraph_deepresearch":"./graph.py:graph"},"env":".env"}

复制代码

classWebSearchItem(BaseModel):
query:str
reason:strclassWebSearchPlan(BaseModel):
searches: List[WebSearchItem]classReportData(BaseModel):
short_summary:str
markdown_report:str
follow_up_questions: List[str]

复制代码

PLANNER_INSTRUCTIONS =("You are a helpful research assistant. Given a query, come up with 5-7 web searches ""to perform to best answer the query.""Return **ONLY valid JSON** that follows this schema:"'{"searches": [ {"query": "example", "reason": "why"} ]}')
planner_chain =(
planner_prompt
| model.with_structured_output(WebSearchPlan, method="json_mode"))

复制代码

SEARCH_INSTRUCTIONS =("You are a research assistant. Given a search term, you search the web for that term and ""produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 ""words. Capture the main points. Write succinctly, no need to have complete sentences or good ""grammar. This will be consumed by someone synthesizing a report, so its vital you capture the ""essence and ignore any fluff. Do not include any additional commentary other than the summary ""itself.")
search_tool = TavilySearch(max_results=5, topic="general")
search_agent = create_react_agent(
model=model,
prompt=SEARCH_INSTRUCTIONS,
tools=[search_tool],)

复制代码

WRITER_PROMPT =("You are a senior researcher tasked with writing a cohesive report for a research query. ""You will be provided with the original query and some initial research.\n\n""① 先给出完整的大纲。\n""② 然后生成正式报告。\n""**写作要求**：\n""· 报告使用 Markdown 格式；\n""· 章节清晰，层次分明；\n""· markdown_report部分至少包含2000中文字符（注意需要用中文进行回复）；\n""· 内容丰富，论据充分，可加入引用和数据，允许分段、添加引用、表格等；\n""· 最终仅返回 JSON：\n"'{"short_summary": "...", "markdown_report": "...", "follow_up_questions": ["..."]}')
writer_chain = writer_prompt | model.with_structured_output(ReportData, method="json_mode")

复制代码

defplanner_node(state: MessagesState)-> Command:
user_query = state["messages"][-1].content
raw = planner_chain.invoke({"query": user_query})print(raw)try:
plan = WebSearchPlan.model_validate(raw)except ValidationError:ifisinstance(raw,dict)andisinstance(raw.get("searches"),list):
plan = WebSearchPlan(searches=[WebSearchItem(query=q, reason="")for q in raw["searches"]])else:raisereturn Command(goto="search_node", update={"messages":[AIMessage(content=plan.model_dump_json())],"plan": plan})

复制代码

defsearch_node(state: MessagesState)-> Command:
plan_json = state["messages"][-1].content
plan = WebSearchPlan.model_validate_json(plan_json)
summaries =[]for item in plan.searches:# 串行处理
run = search_agent.invoke({"messages":[HumanMessage(content=item.query)]})
msgs = run["messages"]
readable =next((m for m inreversed(msgs)ifisinstance(m,(ToolMessage, AIMessage))), msgs[-1])
summaries.append(f"##{item.query}\n\n{readable.content}")
combined ="\n\n".join(summaries)return Command(goto="writer_node", update={"messages":[AIMessage(content=combined)]})

复制代码

defwriter_node(state: MessagesState)-> Command:
original_query = state["messages"][0].content
combined_summary = state["messages"][-1].content
writer_input =(f"原始问题：{original_query}\n\n"f"搜索摘要：\n{combined_summary}")
report = writer_chain.invoke({"content": writer_input})return Command(
goto=END,
update={"messages":[AIMessage(content=json.dumps(report.dict(), ensure_ascii=False, indent=4))]})

复制代码

builder = StateGraph(MessagesState)
builder.add_node("planner_node", planner_node)
builder.add_node("search_node", search_node)
builder.add_node("writer_node", writer_node)
builder.add_edge(START,"planner_node")
builder.add_edge("planner_node","search_node")
builder.add_edge("search_node","writer_node")
builder.add_edge("writer_node", END)
graph = builder.compile()

复制代码

# 封装更完整的图结构import json
import os
from typing import List
from dotenv import load_dotenv
from pydantic import BaseModel, ValidationError, parse_obj_as
from langchain_deepseek import ChatDeepSeek
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
from langgraph.graph import StateGraph, MessagesState, START, END
from langgraph.types import Command
from langgraph.prebuilt import create_react_agent
from langchain_tavily import TavilySearch
from langchain_openai import ChatOpenAI
load_dotenv()
model = ChatDeepSeek(model="deepseek-chat", max_tokens=8000)# model = ChatOpenAI(model="gpt-4-1", max_tokens=32000)# -------- 1) Planner Chain --------
PLANNER_INSTRUCTIONS =("You are a helpful research assistant. Given a query, come up with 5-7 web searches ""to perform to best answer the query.""Return **ONLY valid JSON** that follows this schema:"'{{"searches": [ {{"query": "example", "reason": "why"}} ]}}')classWebSearchItem(BaseModel):
query:str
reason:strclassWebSearchPlan(BaseModel):
searches: List[WebSearchItem]
planner_prompt = ChatPromptTemplate.from_messages([("system", PLANNER_INSTRUCTIONS),("human","{query}")])
planner_chain =(
planner_prompt
| model.with_structured_output(WebSearchPlan, method="json_mode")# 强制 JSON)# -------- 2) search agent --------
SEARCH_INSTRUCTIONS =("You are a research assistant. Given a search term, you search the web for that term and ""produce a concise summary of the results. The summary must 2-3 paragraphs and less than 300 ""words. Capture the main points. Write succinctly, no need to have complete sentences or good ""grammar. This will be consumed by someone synthesizing a report, so its vital you capture the ""essence and ignore any fluff. Do not include any additional commentary other than the summary ""itself.")
search_tool = TavilySearch(max_results=5, topic="general")
search_agent = create_react_agent(
model=model,
prompt=SEARCH_INSTRUCTIONS,
tools=[search_tool],)# -------- 3) Writer Chain --------
WRITER_PROMPT =("You are a senior researcher tasked with writing a cohesive report for a research query. ""You will be provided with the original query and some initial research.\n\n""① 先给出完整的大纲。\n""② 然后生成正式报告。\n""**写作要求**：\n""· 报告使用 Markdown 格式；\n""· 章节清晰，层次分明；\n""· markdown_report部分至少包含2000中文字符（注意需要用中文进行回复）；\n""· 内容丰富，论据充分，可加入引用和数据，允许分段、添加引用、表格等；\n""· 最终仅返回 JSON：\n"'{{"short_summary": "...", "markdown_report": "...", "follow_up_questions": ["..."]}}')classReportData(BaseModel):
short_summary:str
markdown_report:str
follow_up_questions: List[str]
writer_prompt = ChatPromptTemplate.from_messages([("system", WRITER_PROMPT),("human","{content}")])
writer_chain = writer_prompt | model.with_structured_output(ReportData, method="json_mode")# -------------LangGraph 节点----------------defplanner_node(state: MessagesState)-> Command:
user_query = state["messages"][-1].content
raw = planner_chain.invoke({"query": user_query})print(raw)try:# plan = parse_obj_as(WebSearchPlan, raw)
plan = WebSearchPlan.model_validate(raw)except ValidationError:ifisinstance(raw,dict)andisinstance(raw.get("searches"),list):
plan = WebSearchPlan(searches=[WebSearchItem(query=q, reason="")for q in raw["searches"]])else:raisereturn Command(goto="search_node", update={"messages":[AIMessage(content=plan.model_dump_json())],"plan": plan})# -------------search_node----------------defsearch_node(state: MessagesState)-> Command:
plan_json = state["messages"][-1].content
plan = WebSearchPlan.model_validate_json(plan_json)
summaries =[]for item in plan.searches:
run = search_agent.invoke({"messages":[HumanMessage(content=item.query)]})
msgs = run["messages"]
readable =next((m for m inreversed(msgs)ifisinstance(m,(ToolMessage, AIMessage))), msgs[-1])
summaries.append(f"##{item.query}\n\n{readable.content}")
combined ="\n\n".join(summaries)return Command(goto="writer_node", update={"messages":[AIMessage(content=combined)]})# -------------write_node----------------defwriter_node(state: MessagesState)-> Command:
original_query = state["messages"][0].content
combined_summary = state["messages"][-1].content
writer_input =(f"原始问题：{original_query}\n\n"f"搜索摘要：\n{combined_summary}")
report = writer_chain.invoke({"content": writer_input})return Command(
goto=END,
update={"messages":[AIMessage(content=json.dumps(report.dict(), ensure_ascii=False, indent=4))]})# 构建并运行Graph
builder = StateGraph(MessagesState)
builder.add_node("planner_node", planner_node)
builder.add_node("search_node", search_node)
builder.add_node("writer_node", writer_node)# 定义节点间的连接关系
builder.add_edge(START,"planner_node")
builder.add_edge("planner_node","search_node")
builder.add_edge("search_node","writer_node")
builder.add_edge("writer_node", END)# 编译Graph
graph = builder.compile()

复制代码

# 运行研究
result = graph.invoke({"messages":[HumanMessage(content="分析2024年人工智能发展趋势")]})# 解析结果import json
report_data = json.loads(result["messages"][-1].content)print("摘要：", report_data["short_summary"])print("报告：", report_data["markdown_report"])

复制代码