QIN před 9 hodinami
rodič
revize
b4c2335c64
1 změnil soubory, kde provedl 174 přidání a 2 odebrání
  1. 174 2
      sql_agent.ipynb

+ 174 - 2
sql_agent.ipynb

@@ -296,10 +296,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "019a2781",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "✅ 数据加载完成\n",
+      "   员工表:5 行 × 5 列\n",
+      "   产品表:4 行 × 5 列\n",
+      "   订单表:5 行 × 5 列\n",
+      "\n",
+      "📋 员工表示例:\n",
+      " id name department  salary  hire_date\n",
+      "  1   张三        技术部 20000.0 2023-01-15\n",
+      "  2   李四        销售部 11000.0 2023-02-20\n",
+      "  3   王五        技术部 16000.0 2022-11-10\n"
+     ]
+    }
+   ],
    "source": [
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
@@ -333,6 +350,161 @@
     "print(f\"\\n📋 员工表示例:\")\n",
     "print(employees_df.head(3).to_string(index=False))"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "4c043990",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "✅ Python 代码执行沙箱创建成功\n"
+     ]
+    }
+   ],
+   "source": [
+    "import traceback\n",
+    "from io import StringIO\n",
+    "from contextlib import redirect_stdout\n",
+    "from langchain.tools import tool\n",
+    "\n",
+    "# ============================================================\n",
+    "# 定义沙箱的\"白名单\"——只有这些库和数据可以被代码访问\n",
+    "# ============================================================\n",
+    "# 这是一种简单的安全策略:不在白名单里的东西,代码碰不到\n",
+    "SANDBOX_GLOBALS = {\n",
+    "    # 数据:Agent 可以分析这三张表\n",
+    "    \"employees_df\": employees_df,\n",
+    "    \"products_df\":  products_df,\n",
+    "    \"orders_df\":    orders_df,\n",
+    "    # 工具库:Agent 可以用这些库做分析和画图\n",
+    "    \"pd\":  pd,       # pandas — 数据处理\n",
+    "    \"plt\": plt,      # matplotlib — 基础绑图\n",
+    "    \"sns\": sns,      # seaborn — 统计可视化\n",
+    "    \"np\":  np,       # numpy — 数值计算\n",
+    "}\n",
+    "\n",
+    "\n",
+    "@tool\n",
+    "def execute_python_code(code: str) -> str:\n",
+    "    \"\"\"\n",
+    "    执行 Python 代码进行数据分析和可视化。\n",
+    "\n",
+    "    可用变量:\n",
+    "      - employees_df: 员工表 DataFrame(字段:id, name, department, salary, hire_date)\n",
+    "      - products_df:  产品表 DataFrame(字段:id, product_name, category, price, stock)\n",
+    "      - orders_df:    订单表 DataFrame(字段:id, employee_id, product_id, quantity, order_date)\n",
+    "      - pd:   pandas 库\n",
+    "      - plt:  matplotlib.pyplot\n",
+    "      - sns:  seaborn\n",
+    "      - np:   numpy\n",
+    "\n",
+    "    使用示例:\n",
+    "      # 统计各部门平均薪资\n",
+    "      result = employees_df.groupby('department')['salary'].mean()\n",
+    "      print(result)\n",
+    "\n",
+    "      # 画柱状图\n",
+    "      plt.figure(figsize=(10, 6))\n",
+    "      employees_df.groupby('department')['salary'].mean().plot(kind='bar')\n",
+    "      plt.title('各部门平均薪资')\n",
+    "      plt.show()\n",
+    "    \"\"\"\n",
+    "    # 准备隔离的执行环境\n",
+    "    # globals_dict 提供白名单变量,locals_dict 收集执行过程中产生的新变量\n",
+    "    exec_globals = dict(SANDBOX_GLOBALS)\n",
+    "    exec_locals = {}\n",
+    "\n",
+    "    # 用 StringIO 捕获 print() 的输出\n",
+    "    # 这样 Agent 生成的代码里所有的 print 语句都会被收集\n",
+    "    output_buffer = StringIO()\n",
+    "\n",
+    "    try:\n",
+    "        # redirect_stdout 会把标准输出重定向到我们的 buffer\n",
+    "        with redirect_stdout(output_buffer):\n",
+    "            exec(code, exec_globals, exec_locals)\n",
+    "\n",
+    "        result = output_buffer.getvalue()\n",
+    "\n",
+    "        # 如果代码没有 print 任何东西,给个默认提示\n",
+    "        if not result.strip():\n",
+    "            result = \"✅ 代码执行成功(无文本输出,可能已生成图表)\"\n",
+    "\n",
+    "        return f\"执行成功:\\n{result}\"\n",
+    "\n",
+    "    except Exception as e:\n",
+    "        # 出错时返回完整的错误堆栈,方便 Agent 自我修正\n",
+    "        error_detail = traceback.format_exc()\n",
+    "        return f\"❌ 执行出错:{e}\\n\\n{error_detail}\"\n",
+    "\n",
+    "\n",
+    "print(\"✅ Python 代码执行沙箱创建成功\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "dbeb1792",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "数据可视化 Agent 创建完成\n"
+     ]
+    }
+   ],
+   "source": [
+    "from langchain.agents import create_agent\n",
+    "\n",
+    "# ============================================================\n",
+    "# 定义可视化 Agent 的 System Prompt\n",
+    "# ============================================================\n",
+    "VISUALIZATION_PROMPT = \"\"\"你是一名资深数据分析师,精通 Python、Pandas 和 Matplotlib 数据可视化。\n",
+    "\n",
+    "## 可用数据\n",
+    "1. employees_df — 员工表(字段:id, name, department, salary, hire_date)\n",
+    "2. products_df  — 产品表(字段:id, product_name, category, price, stock)\n",
+    "3. orders_df    — 订单表(字段:id, employee_id, product_id, quantity, order_date)\n",
+    "\n",
+    "## 工作流程\n",
+    "1. 理解用户的分析需求\n",
+    "2. 用 execute_python_code 工具编写并执行 Python 代码\n",
+    "3. 先做数据探索(head、describe、info),再做深入分析\n",
+    "4. 用中文解释分析结果,给出业务洞察\n",
+    "\n",
+    "## 代码规范\n",
+    "- 绑图前设置中文字体:plt.rcParams['font.sans-serif'] = ['SimHei', 'PingFang SC', 'DejaVu Sans']\n",
+    "- 设置 plt.rcParams['axes.unicode_minus'] = False\n",
+    "- 图表尺寸统一用 plt.figure(figsize=(10, 6))\n",
+    "- 必须添加标题、坐标轴标签,让图表自解释\n",
+    "- 用 print() 输出关键统计量,不要只画图不说话\n",
+    "- 图表标题用英文(避免渲染问题),但用中文向用户解释结果\n",
+    "\n",
+    "## 注意事项\n",
+    "- 每次只执行一段完整的代码,不要拆成多段\n",
+    "- 先探索数据结构,再做分析——不要上来就画图\n",
+    "- 结果要有业务洞察,不只是\"最大值是 XXX\"\n",
+    "\"\"\"\n",
+    "\n",
+    "# ============================================================\n",
+    "# 组装可视化 Agent(langchain 1.3.1 写法)\n",
+    "# ============================================================\n",
+    "viz_tools = [execute_python_code]\n",
+    "\n",
+    "# 同样用 create_agent 一行搞定,和 SQL Agent 的创建方式完全一致\n",
+    "visualization_agent = create_agent(\n",
+    "    model=llm,\n",
+    "    tools=viz_tools,\n",
+    "    system_prompt=VISUALIZATION_PROMPT,\n",
+    ")\n",
+    "\n",
+    "print(\"数据可视化 Agent 创建完成\")"
+   ]
   }
  ],
  "metadata": {