|
|
@@ -296,10 +296,27 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
+ "execution_count": 7,
|
|
|
"id": "019a2781",
|
|
|
"metadata": {},
|
|
|
- "outputs": [],
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "✅ 数据加载完成\n",
|
|
|
+ " 员工表:5 行 × 5 列\n",
|
|
|
+ " 产品表:4 行 × 5 列\n",
|
|
|
+ " 订单表:5 行 × 5 列\n",
|
|
|
+ "\n",
|
|
|
+ "📋 员工表示例:\n",
|
|
|
+ " id name department salary hire_date\n",
|
|
|
+ " 1 张三 技术部 20000.0 2023-01-15\n",
|
|
|
+ " 2 李四 销售部 11000.0 2023-02-20\n",
|
|
|
+ " 3 王五 技术部 16000.0 2022-11-10\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
"source": [
|
|
|
"import pandas as pd\n",
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
@@ -333,6 +350,161 @@
|
|
|
"print(f\"\\n📋 员工表示例:\")\n",
|
|
|
"print(employees_df.head(3).to_string(index=False))"
|
|
|
]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 8,
|
|
|
+ "id": "4c043990",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "✅ Python 代码执行沙箱创建成功\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "import traceback\n",
|
|
|
+ "from io import StringIO\n",
|
|
|
+ "from contextlib import redirect_stdout\n",
|
|
|
+ "from langchain.tools import tool\n",
|
|
|
+ "\n",
|
|
|
+ "# ============================================================\n",
|
|
|
+ "# 定义沙箱的\"白名单\"——只有这些库和数据可以被代码访问\n",
|
|
|
+ "# ============================================================\n",
|
|
|
+ "# 这是一种简单的安全策略:不在白名单里的东西,代码碰不到\n",
|
|
|
+ "SANDBOX_GLOBALS = {\n",
|
|
|
+ " # 数据:Agent 可以分析这三张表\n",
|
|
|
+ " \"employees_df\": employees_df,\n",
|
|
|
+ " \"products_df\": products_df,\n",
|
|
|
+ " \"orders_df\": orders_df,\n",
|
|
|
+ " # 工具库:Agent 可以用这些库做分析和画图\n",
|
|
|
+ " \"pd\": pd, # pandas — 数据处理\n",
|
|
|
+ " \"plt\": plt, # matplotlib — 基础绑图\n",
|
|
|
+ " \"sns\": sns, # seaborn — 统计可视化\n",
|
|
|
+ " \"np\": np, # numpy — 数值计算\n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "@tool\n",
|
|
|
+ "def execute_python_code(code: str) -> str:\n",
|
|
|
+ " \"\"\"\n",
|
|
|
+ " 执行 Python 代码进行数据分析和可视化。\n",
|
|
|
+ "\n",
|
|
|
+ " 可用变量:\n",
|
|
|
+ " - employees_df: 员工表 DataFrame(字段:id, name, department, salary, hire_date)\n",
|
|
|
+ " - products_df: 产品表 DataFrame(字段:id, product_name, category, price, stock)\n",
|
|
|
+ " - orders_df: 订单表 DataFrame(字段:id, employee_id, product_id, quantity, order_date)\n",
|
|
|
+ " - pd: pandas 库\n",
|
|
|
+ " - plt: matplotlib.pyplot\n",
|
|
|
+ " - sns: seaborn\n",
|
|
|
+ " - np: numpy\n",
|
|
|
+ "\n",
|
|
|
+ " 使用示例:\n",
|
|
|
+ " # 统计各部门平均薪资\n",
|
|
|
+ " result = employees_df.groupby('department')['salary'].mean()\n",
|
|
|
+ " print(result)\n",
|
|
|
+ "\n",
|
|
|
+ " # 画柱状图\n",
|
|
|
+ " plt.figure(figsize=(10, 6))\n",
|
|
|
+ " employees_df.groupby('department')['salary'].mean().plot(kind='bar')\n",
|
|
|
+ " plt.title('各部门平均薪资')\n",
|
|
|
+ " plt.show()\n",
|
|
|
+ " \"\"\"\n",
|
|
|
+ " # 准备隔离的执行环境\n",
|
|
|
+ " # globals_dict 提供白名单变量,locals_dict 收集执行过程中产生的新变量\n",
|
|
|
+ " exec_globals = dict(SANDBOX_GLOBALS)\n",
|
|
|
+ " exec_locals = {}\n",
|
|
|
+ "\n",
|
|
|
+ " # 用 StringIO 捕获 print() 的输出\n",
|
|
|
+ " # 这样 Agent 生成的代码里所有的 print 语句都会被收集\n",
|
|
|
+ " output_buffer = StringIO()\n",
|
|
|
+ "\n",
|
|
|
+ " try:\n",
|
|
|
+ " # redirect_stdout 会把标准输出重定向到我们的 buffer\n",
|
|
|
+ " with redirect_stdout(output_buffer):\n",
|
|
|
+ " exec(code, exec_globals, exec_locals)\n",
|
|
|
+ "\n",
|
|
|
+ " result = output_buffer.getvalue()\n",
|
|
|
+ "\n",
|
|
|
+ " # 如果代码没有 print 任何东西,给个默认提示\n",
|
|
|
+ " if not result.strip():\n",
|
|
|
+ " result = \"✅ 代码执行成功(无文本输出,可能已生成图表)\"\n",
|
|
|
+ "\n",
|
|
|
+ " return f\"执行成功:\\n{result}\"\n",
|
|
|
+ "\n",
|
|
|
+ " except Exception as e:\n",
|
|
|
+ " # 出错时返回完整的错误堆栈,方便 Agent 自我修正\n",
|
|
|
+ " error_detail = traceback.format_exc()\n",
|
|
|
+ " return f\"❌ 执行出错:{e}\\n\\n{error_detail}\"\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"✅ Python 代码执行沙箱创建成功\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 9,
|
|
|
+ "id": "dbeb1792",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "数据可视化 Agent 创建完成\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from langchain.agents import create_agent\n",
|
|
|
+ "\n",
|
|
|
+ "# ============================================================\n",
|
|
|
+ "# 定义可视化 Agent 的 System Prompt\n",
|
|
|
+ "# ============================================================\n",
|
|
|
+ "VISUALIZATION_PROMPT = \"\"\"你是一名资深数据分析师,精通 Python、Pandas 和 Matplotlib 数据可视化。\n",
|
|
|
+ "\n",
|
|
|
+ "## 可用数据\n",
|
|
|
+ "1. employees_df — 员工表(字段:id, name, department, salary, hire_date)\n",
|
|
|
+ "2. products_df — 产品表(字段:id, product_name, category, price, stock)\n",
|
|
|
+ "3. orders_df — 订单表(字段:id, employee_id, product_id, quantity, order_date)\n",
|
|
|
+ "\n",
|
|
|
+ "## 工作流程\n",
|
|
|
+ "1. 理解用户的分析需求\n",
|
|
|
+ "2. 用 execute_python_code 工具编写并执行 Python 代码\n",
|
|
|
+ "3. 先做数据探索(head、describe、info),再做深入分析\n",
|
|
|
+ "4. 用中文解释分析结果,给出业务洞察\n",
|
|
|
+ "\n",
|
|
|
+ "## 代码规范\n",
|
|
|
+ "- 绑图前设置中文字体:plt.rcParams['font.sans-serif'] = ['SimHei', 'PingFang SC', 'DejaVu Sans']\n",
|
|
|
+ "- 设置 plt.rcParams['axes.unicode_minus'] = False\n",
|
|
|
+ "- 图表尺寸统一用 plt.figure(figsize=(10, 6))\n",
|
|
|
+ "- 必须添加标题、坐标轴标签,让图表自解释\n",
|
|
|
+ "- 用 print() 输出关键统计量,不要只画图不说话\n",
|
|
|
+ "- 图表标题用英文(避免渲染问题),但用中文向用户解释结果\n",
|
|
|
+ "\n",
|
|
|
+ "## 注意事项\n",
|
|
|
+ "- 每次只执行一段完整的代码,不要拆成多段\n",
|
|
|
+ "- 先探索数据结构,再做分析——不要上来就画图\n",
|
|
|
+ "- 结果要有业务洞察,不只是\"最大值是 XXX\"\n",
|
|
|
+ "\"\"\"\n",
|
|
|
+ "\n",
|
|
|
+ "# ============================================================\n",
|
|
|
+ "# 组装可视化 Agent(langchain 1.3.1 写法)\n",
|
|
|
+ "# ============================================================\n",
|
|
|
+ "viz_tools = [execute_python_code]\n",
|
|
|
+ "\n",
|
|
|
+ "# 同样用 create_agent 一行搞定,和 SQL Agent 的创建方式完全一致\n",
|
|
|
+ "visualization_agent = create_agent(\n",
|
|
|
+ " model=llm,\n",
|
|
|
+ " tools=viz_tools,\n",
|
|
|
+ " system_prompt=VISUALIZATION_PROMPT,\n",
|
|
|
+ ")\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"数据可视化 Agent 创建完成\")"
|
|
|
+ ]
|
|
|
}
|
|
|
],
|
|
|
"metadata": {
|