{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "IaK62oX3NhRf"
   },
   "source": [
    "# 基于langchain创建自己专属的对话大模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. 领域精准问答\n",
    "2. 数据更新频繁\n",
    "3. 生成内容可解释可追溯\n",
    "4. 数据隐私保护"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "-173FmLdNhRj"
   },
   "source": [
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "fzzHZ2sLNhRk",
    "tags": []
   },
   "source": [
    "### 准备环境"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T11:57:35.802733Z",
     "iopub.status.busy": "2026-05-14T11:57:35.802515Z",
     "iopub.status.idle": "2026-05-14T11:57:54.945428Z",
     "shell.execute_reply": "2026-05-14T11:57:54.944824Z",
     "shell.execute_reply.started": "2026-05-14T11:57:35.802705Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://mirrors.cloud.aliyuncs.com/pypi/simple\n",
      "Collecting langchain\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/7b/6f/b9a9721c27fbb6d29a6a7cd89d6a41eeffc7c79b49b9a5cf5beb1d60952d/langchain-1.3.0-py3-none-any.whl (114 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m114.1/114.1 kB\u001b[0m \u001b[31m37.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting langchain_community\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/f0/a4/c4fde67f193401512337456cabc2148f2c43316e445f5decd9f8806e2992/langchain_community-0.4.1-py3-none-any.whl (2.5 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m105.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting chromadb\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/e6/ec/0c42039e80b9acc534f67b73b7a42471948042859b3a64867b50a4a77fa3/chromadb-1.5.9-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (23.3 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m23.3/23.3 MB\u001b[0m \u001b[31m106.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting langchain_openai\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/dc/55/2865b18ee3a3dd11160b8c4b2cf37e75bf2a4a8d1d38868ffffc7b7cc180/langchain_openai-1.2.1-py3-none-any.whl (98 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.6/98.6 kB\u001b[0m \u001b[31m31.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting langchain_core\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/0f/1a/86c38c27b81913a1c6c12448cab55defb5a1097c7dc9a4cea83f55477a2d/langchain_core-1.4.0-py3-none-any.whl (548 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m548.1/548.1 kB\u001b[0m \u001b[31m118.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting langgraph<1.3.0,>=1.2.0 (from langchain)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/f6/e8/e3304ac0015c2bdb04ad9785e4ed65c788855ce7857ce6104dd2f5d322db/langgraph-1.2.0-py3-none-any.whl (234 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m234.3/234.3 kB\u001b[0m \u001b[31m68.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/site-packages (from langchain) (2.12.3)\n",
      "Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/f5/78/2d9980d028ff0523eea503a77c200e2ff252a3a75eb6e7842bcf5f9c979b/langchain_classic-1.0.7-py3-none-any.whl (1.0 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.0/1.0 MB\u001b[0m \u001b[31m123.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting SQLAlchemy<3.0.0,>=1.4.0 (from langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/b1/61/0722511d98c54de95acb327824cb759e8653789af2b1944ab1cc69d32565/sqlalchemy-2.0.49-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.3 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m124.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: requests<3.0.0,>=2.32.5 in /usr/local/lib/python3.11/site-packages (from langchain_community) (2.32.5)\n",
      "Requirement already satisfied: PyYAML<7.0.0,>=5.3.0 in /usr/local/lib/python3.11/site-packages (from langchain_community) (6.0.3)\n",
      "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/site-packages (from langchain_community) (3.13.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.11/site-packages (from langchain_community) (9.1.4)\n",
      "Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/c3/be/d0d44e092656fe7a06b55e6103cbce807cdbdee17884a5367c68c9860853/dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n",
      "Collecting pydantic-settings<3.0.0,>=2.10.1 (from langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/ae/8d/f1af3832f5e6eb13ba94ee809e72b8ecb5eef226d27ee0bef7d963d943c7/pydantic_settings-2.14.1-py3-none-any.whl (60 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m61.0/61.0 kB\u001b[0m \u001b[31m22.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting langsmith<1.0.0,>=0.1.125 (from langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/db/94/8b872959ea529ecfbbe2c3f91d9ebf98cb8dbd9e3f7487bc134740d3d235/langsmith-0.8.4-py3-none-any.whl (398 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m398.7/398.7 kB\u001b[0m \u001b[31m97.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl (9.0 kB)\n",
      "Requirement already satisfied: numpy>=1.26.2 in /usr/local/lib/python3.11/site-packages (from langchain_community) (1.26.4)\n",
      "Collecting build>=1.0.3 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/0d/fe/6bea5c9162869c5beba5d9c8abbed835ec85bf1ec1fba05a3822325c45f3/build-1.5.0-py3-none-any.whl (26 kB)\n",
      "Collecting pybase64>=1.4.1 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/64/15/8d60b9ec5e658185fc2ee3333e01a6e30d717cf677b24f47cbb3a859d13c/pybase64-1.4.3-cp311-cp311-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl (71 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.4/71.4 kB\u001b[0m \u001b[31m23.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: uvicorn>=0.18.3 in /usr/local/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (0.41.0)\n",
      "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (4.15.0)\n",
      "Collecting onnxruntime>=1.14.1 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/b6/3f/8de630f595daf6ce884d4dd95afd2a60e70ec6572e52bfee3aa2229befab/onnxruntime-1.26.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (18.2 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m18.2/18.2 MB\u001b[0m \u001b[31m115.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n",
      "\u001b[?25hCollecting opentelemetry-api>=1.2.0 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/29/59/3e7118ed140f76b0982ba4321bdaed1997a0473f9720de2d10788a577033/opentelemetry_api-1.41.1-py3-none-any.whl (69 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m69.0/69.0 kB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/ac/f2/c54f33c92443d087703e57e52e55f22f111373a5c4c4aa349ea60efe512e/opentelemetry_exporter_otlp_proto_grpc-1.41.1-py3-none-any.whl (20 kB)\n",
      "Collecting opentelemetry-sdk>=1.2.0 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/b4/e7/a1420b698aad018e1cf60fdbaaccbe49021fb415e2a0d81c242f4c518f54/opentelemetry_sdk-1.41.1-py3-none-any.whl (180 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m180.2/180.2 kB\u001b[0m \u001b[31m51.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: tokenizers>=0.13.2 in /usr/local/lib/python3.11/site-packages (from chromadb) (0.22.2)\n",
      "Collecting pypika>=0.48.9 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/57/83/c77dfeed04022e8930b08eedca2b6e5efed256ab3321396fde90066efb65/pypika-0.51.1-py2.py3-none-any.whl (60 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m60.6/60.6 kB\u001b[0m \u001b[31m18.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: tqdm>=4.65.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (4.67.3)\n",
      "Requirement already satisfied: overrides>=7.3.1 in /usr/local/lib/python3.11/site-packages (from chromadb) (7.7.0)\n",
      "Collecting importlib-resources (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/8a/db/55a262f3606bebcae07cc14095338471ad7c0bbcaa37707e6f0ee49725b7/importlib_resources-7.1.0-py3-none-any.whl (37 kB)\n",
      "Requirement already satisfied: grpcio>=1.58.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (1.78.0)\n",
      "Collecting bcrypt>=4.0.1 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl (278 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m278.2/278.2 kB\u001b[0m \u001b[31m69.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: typer>=0.9.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (0.23.1)\n",
      "Collecting kubernetes>=28.1.0 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/0c/70/05b685ea2dffcb2adbf3cdcea5d8865b7bc66f67249084cf845012a0ff13/kubernetes-35.0.0-py2.py3-none-any.whl (2.0 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.0/2.0 MB\u001b[0m \u001b[31m135.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting mmh3>=4.0.1 (from chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/ee/93/723e317dd9e041c4dc4566a2eb53b01ad94de31750e0b834f1643905e97c/mmh3-5.2.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (103 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m103.1/103.1 kB\u001b[0m \u001b[31m33.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: orjson>=3.9.12 in /usr/local/lib/python3.11/site-packages (from chromadb) (3.11.7)\n",
      "Requirement already satisfied: httpx>=0.27.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (0.28.1)\n",
      "Requirement already satisfied: rich>=10.11.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (14.3.3)\n",
      "Requirement already satisfied: jsonschema>=4.19.0 in /usr/local/lib/python3.11/site-packages (from chromadb) (4.26.0)\n",
      "Requirement already satisfied: openai<3.0.0,>=2.26.0 in /usr/local/lib/python3.11/site-packages (from langchain_openai) (2.26.0)\n",
      "Requirement already satisfied: tiktoken<1.0.0,>=0.7.0 in /usr/local/lib/python3.11/site-packages (from langchain_openai) (0.12.0)\n",
      "Collecting jsonpatch<2.0.0,>=1.33.0 (from langchain_core)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl (12 kB)\n",
      "Collecting langchain-protocol>=0.0.14 (from langchain_core)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/1d/7a/9c97a7b9cbe4c5dc6a44cdb1545450c28f0c8ce89b9c1f0ee7fbad896263/langchain_protocol-0.0.15-py3-none-any.whl (7.0 kB)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /usr/local/lib/python3.11/site-packages (from langchain_core) (26.0)\n",
      "Collecting uuid-utils<1.0,>=0.12.0 (from langchain_core)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/f0/64/e4face9cb91260587b0193bb81ba058f476204a9a7d1ca754d31e414fc92/uuid_utils-0.15.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (326 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m326.5/326.5 kB\u001b[0m \u001b[31m85.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: aiohappyeyeballs>=2.5.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (2.6.1)\n",
      "Requirement already satisfied: aiosignal>=1.4.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.4.0)\n",
      "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (25.4.0)\n",
      "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.8.0)\n",
      "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (6.7.1)\n",
      "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (0.4.1)\n",
      "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain_community) (1.23.0)\n",
      "Collecting pyproject_hooks (from build>=1.0.3->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl (10 kB)\n",
      "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/be/2f/5108cb3ee4ba6501748c4908b908e55f42a5b66245b4cfe0c99326e1ef6e/marshmallow-3.26.2-py3-none-any.whl (50 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.0/51.0 kB\u001b[0m \u001b[31m16.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/65/f3/107a22063bf27bdccf2024833d3445f4eea42b2e598abfbd46f6a63b6cb0/typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n",
      "Requirement already satisfied: anyio in /usr/local/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb) (4.12.1)\n",
      "Requirement already satisfied: certifi in /usr/local/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb) (2026.2.25)\n",
      "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb) (1.0.9)\n",
      "Requirement already satisfied: idna in /usr/local/lib/python3.11/site-packages (from httpx>=0.27.0->chromadb) (3.11)\n",
      "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.27.0->chromadb) (0.16.0)\n",
      "Collecting jsonpointer>=1.9 (from jsonpatch<2.0.0,>=1.33.0->langchain_core)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/9e/6a/a83720e953b1682d2d109d3c2dbb0bc9bf28cc1cbc205be4ef4be5da709d/jsonpointer-3.1.1-py3-none-any.whl (7.7 kB)\n",
      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/site-packages (from jsonschema>=4.19.0->chromadb) (2025.9.1)\n",
      "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/site-packages (from jsonschema>=4.19.0->chromadb) (0.37.0)\n",
      "Requirement already satisfied: rpds-py>=0.25.0 in /usr/local/lib/python3.11/site-packages (from jsonschema>=4.19.0->chromadb) (0.30.0)\n",
      "Requirement already satisfied: six>=1.9.0 in /usr/local/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb) (1.17.0)\n",
      "Requirement already satisfied: python-dateutil>=2.5.3 in /usr/local/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb) (2.9.0.post0)\n",
      "Collecting websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0 (from kubernetes>=28.1.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/34/db/b10e48aa8fff7407e67470363eac595018441cf32d5e1001567a7aeba5d2/websocket_client-1.9.0-py3-none-any.whl (82 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m82.6/82.6 kB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting requests-oauthlib (from kubernetes>=28.1.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl (24 kB)\n",
      "Requirement already satisfied: urllib3!=2.6.0,>=1.24.2 in /usr/local/lib/python3.11/site-packages (from kubernetes>=28.1.0->chromadb) (2.6.3)\n",
      "Collecting durationpy>=0.7 (from kubernetes>=28.1.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl (3.9 kB)\n",
      "Collecting langchain-text-splitters<2.0.0,>=1.1.2 (from langchain-classic<2.0.0,>=1.0.0->langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/d3/26/1ef06f56198d631296d646a6223de35bcc6cf9795ceb2442816bc963b84c/langchain_text_splitters-1.1.2-py3-none-any.whl (35 kB)\n",
      "Collecting langgraph-checkpoint<5.0.0,>=4.1.0 (from langgraph<1.3.0,>=1.2.0->langchain)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/93/74/d3be2b41955e20ccd624dba5f6fe9d38dcee385ba470a6e13ed86732fc86/langgraph_checkpoint-4.1.0-py3-none-any.whl (56 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.0/56.0 kB\u001b[0m \u001b[31m16.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting langgraph-prebuilt<1.2.0,>=1.1.0 (from langgraph<1.3.0,>=1.2.0->langchain)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/e9/43/3fe1a700b8490ed02679cdbbc8c915eb23a092faf496c9c1118abcd10be3/langgraph_prebuilt-1.1.0-py3-none-any.whl (41 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m41.0/41.0 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting langgraph-sdk<0.4.0,>=0.3.0 (from langgraph<1.3.0,>=1.2.0->langchain)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/34/96/1c9f9fbfe756ddd850a2585e7f1949d8ebb97fdaa7a5eff8f45ed1314670/langgraph_sdk-0.3.14-py3-none-any.whl (97 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m97.0/97.0 kB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: xxhash>=3.5.0 in /usr/local/lib/python3.11/site-packages (from langgraph<1.3.0,>=1.2.0->langchain) (3.6.0)\n",
      "Collecting requests-toolbelt>=1.0.0 (from langsmith<1.0.0,>=0.1.125->langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl (54 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m54.5/54.5 kB\u001b[0m \u001b[31m17.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.1.125->langchain_community) (0.25.0)\n",
      "Collecting flatbuffers (from onnxruntime>=1.14.1->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/e8/2d/d2a548598be01649e2d46231d151a6c56d10b964d94043a335ae56ea2d92/flatbuffers-25.12.19-py2.py3-none-any.whl (26 kB)\n",
      "Requirement already satisfied: protobuf in /usr/local/lib/python3.11/site-packages (from onnxruntime>=1.14.1->chromadb) (7.34.0)\n",
      "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (1.9.0)\n",
      "Requirement already satisfied: jiter<1,>=0.10.0 in /usr/local/lib/python3.11/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (0.13.0)\n",
      "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/site-packages (from openai<3.0.0,>=2.26.0->langchain_openai) (1.3.1)\n",
      "Requirement already satisfied: importlib-metadata<8.8.0,>=6.0 in /usr/local/lib/python3.11/site-packages (from opentelemetry-api>=1.2.0->chromadb) (8.7.1)\n",
      "Collecting googleapis-common-protos~=1.57 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/e7/c8/e2645aa8ed02fd4c7a2f59d68783b65b1f3cbdfe39a6308e156509d1fee8/googleapis_common_protos-1.75.0-py3-none-any.whl (300 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m300.6/300.6 kB\u001b[0m \u001b[31m79.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting opentelemetry-exporter-otlp-proto-common==1.41.1 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/29/48/bce76d3ea772b609757e9bc844e02ab408a6446609bf74fb562062ba6b71/opentelemetry_exporter_otlp_proto_common-1.41.1-py3-none-any.whl (18 kB)\n",
      "Collecting opentelemetry-proto==1.41.1 (from opentelemetry-exporter-otlp-proto-grpc>=1.2.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/e4/1e/5cd77035e3e82070e2265a63a760f715aacd3cb16dddc7efee913f297fcc/opentelemetry_proto-1.41.1-py3-none-any.whl (72 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m72.1/72.1 kB\u001b[0m \u001b[31m23.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting protobuf (from onnxruntime>=1.14.1->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/16/92/d1e32e3e0d894fe00b15ce28ad4944ab692713f2e7f0a99787405e43533a/protobuf-6.33.6-cp39-abi3-manylinux2014_x86_64.whl (323 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m323.4/323.4 kB\u001b[0m \u001b[31m81.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting opentelemetry-semantic-conventions==0.62b1 (from opentelemetry-sdk>=1.2.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/eb/a6/83dc2ab6fa397ee66fba04fe2e74bdf7be3b3870005359ceb7689103c058/opentelemetry_semantic_conventions-0.62b1-py3-none-any.whl (231 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m231.6/231.6 kB\u001b[0m \u001b[31m66.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.41.4 in /usr/local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.41.4)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.4.2)\n",
      "Requirement already satisfied: python-dotenv>=0.21.0 in /usr/local/lib/python3.11/site-packages (from pydantic-settings<3.0.0,>=2.10.1->langchain_community) (1.2.2)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests<3.0.0,>=2.32.5->langchain_community) (3.4.5)\n",
      "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/site-packages (from rich>=10.11.0->chromadb) (4.0.0)\n",
      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/site-packages (from rich>=10.11.0->chromadb) (2.19.2)\n",
      "Requirement already satisfied: greenlet>=1 in /usr/local/lib/python3.11/site-packages (from SQLAlchemy<3.0.0,>=1.4.0->langchain_community) (3.3.2)\n",
      "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/site-packages (from tiktoken<1.0.0,>=0.7.0->langchain_openai) (2026.2.28)\n",
      "Requirement already satisfied: huggingface-hub<2.0,>=0.16.4 in /usr/local/lib/python3.11/site-packages (from tokenizers>=0.13.2->chromadb) (1.6.0)\n",
      "Requirement already satisfied: click>=8.0.0 in /usr/local/lib/python3.11/site-packages (from typer>=0.9.0->chromadb) (8.1.8)\n",
      "Requirement already satisfied: shellingham>=1.3.0 in /usr/local/lib/python3.11/site-packages (from typer>=0.9.0->chromadb) (1.5.4)\n",
      "Requirement already satisfied: annotated-doc>=0.0.2 in /usr/local/lib/python3.11/site-packages (from typer>=0.9.0->chromadb) (0.0.4)\n",
      "Collecting httptools>=0.6.3 (from uvicorn[standard]>=0.18.3->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/cc/cc/10935db22fda0ee34c76f047590ca0a8bd9de531406a3ccb10a90e12ea21/httptools-0.7.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl (456 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m456.6/456.6 kB\u001b[0m \u001b[31m71.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting uvloop>=0.15.1 (from uvicorn[standard]>=0.18.3->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/74/4f/256aca690709e9b008b7108bc85fba619a2bc37c6d80743d18abad16ee09/uvloop-0.22.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.8 MB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m133.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hCollecting watchfiles>=0.20 (from uvicorn[standard]>=0.18.3->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/af/b9/a419292f05e302dea372fa7e6fda5178a92998411f8581b9830d28fb9edb/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (456 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m456.1/456.1 kB\u001b[0m \u001b[31m98.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: websockets>=10.4 in /usr/local/lib/python3.11/site-packages (from uvicorn[standard]>=0.18.3->chromadb) (15.0.1)\n",
      "Requirement already satisfied: filelock>=3.10.0 in /usr/local/lib/python3.11/site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (3.25.0)\n",
      "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (2025.3.0)\n",
      "Requirement already satisfied: hf-xet<2.0.0,>=1.3.2 in /usr/local/lib/python3.11/site-packages (from huggingface-hub<2.0,>=0.16.4->tokenizers>=0.13.2->chromadb) (1.4.1)\n",
      "Requirement already satisfied: zipp>=3.20 in /usr/local/lib/python3.11/site-packages (from importlib-metadata<8.8.0,>=6.0->opentelemetry-api>=1.2.0->chromadb) (3.23.0)\n",
      "Collecting ormsgpack>=1.12.0 (from langgraph-checkpoint<5.0.0,>=4.1.0->langgraph<1.3.0,>=1.2.0->langchain)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/1b/33/543627f323ff3c73091f51d6a20db28a1a33531af30873ea90c5ac95a9b5/ormsgpack-1.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m212.4/212.4 kB\u001b[0m \u001b[31m62.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->chromadb) (0.1.2)\n",
      "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7.0,>=0.6.7->langchain_community)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl (5.0 kB)\n",
      "Collecting oauthlib>=3.0.0 (from requests-oauthlib->kubernetes>=28.1.0->chromadb)\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl (160 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m160.1/160.1 kB\u001b[0m \u001b[31m48.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hInstalling collected packages: pypika, flatbuffers, durationpy, websocket-client, uvloop, uuid-utils, SQLAlchemy, pyproject_hooks, pybase64, protobuf, ormsgpack, oauthlib, mypy-extensions, mmh3, marshmallow, langchain-protocol, jsonpointer, importlib-resources, httpx-sse, httptools, bcrypt, watchfiles, typing-inspect, requests-toolbelt, requests-oauthlib, opentelemetry-proto, opentelemetry-api, onnxruntime, jsonpatch, googleapis-common-protos, build, pydantic-settings, opentelemetry-semantic-conventions, opentelemetry-exporter-otlp-proto-common, langsmith, langgraph-sdk, kubernetes, dataclasses-json, opentelemetry-sdk, langchain_core, opentelemetry-exporter-otlp-proto-grpc, langgraph-checkpoint, langchain-text-splitters, langchain_openai, langgraph-prebuilt, langchain-classic, chromadb, langgraph, langchain_community, langchain\n",
      "  Attempting uninstall: protobuf\n",
      "    Found existing installation: protobuf 7.34.0\n",
      "    Uninstalling protobuf-7.34.0:\n",
      "      Successfully uninstalled protobuf-7.34.0\n",
      "Successfully installed SQLAlchemy-2.0.49 bcrypt-5.0.0 build-1.5.0 chromadb-1.5.9 dataclasses-json-0.6.7 durationpy-0.10 flatbuffers-25.12.19 googleapis-common-protos-1.75.0 httptools-0.7.1 httpx-sse-0.4.3 importlib-resources-7.1.0 jsonpatch-1.33 jsonpointer-3.1.1 kubernetes-35.0.0 langchain-1.3.0 langchain-classic-1.0.7 langchain-protocol-0.0.15 langchain-text-splitters-1.1.2 langchain_community-0.4.1 langchain_core-1.4.0 langchain_openai-1.2.1 langgraph-1.2.0 langgraph-checkpoint-4.1.0 langgraph-prebuilt-1.1.0 langgraph-sdk-0.3.14 langsmith-0.8.4 marshmallow-3.26.2 mmh3-5.2.1 mypy-extensions-1.1.0 oauthlib-3.3.1 onnxruntime-1.26.0 opentelemetry-api-1.41.1 opentelemetry-exporter-otlp-proto-common-1.41.1 opentelemetry-exporter-otlp-proto-grpc-1.41.1 opentelemetry-proto-1.41.1 opentelemetry-sdk-1.41.1 opentelemetry-semantic-conventions-0.62b1 ormsgpack-1.12.2 protobuf-6.33.6 pybase64-1.4.3 pydantic-settings-2.14.1 pypika-0.51.1 pyproject_hooks-1.2.0 requests-oauthlib-2.0.0 requests-toolbelt-1.0.0 typing-inspect-0.9.0 uuid-utils-0.15.0 uvloop-0.22.1 watchfiles-1.1.1 websocket-client-1.9.0\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1.1\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!pip install langchain langchain_community chromadb langchain_openai langchain_core"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1Eg0ay3TNhRm",
    "tags": []
   },
   "source": [
    "### 创建一个对话模型(no RAG)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T11:57:56.652294Z",
     "iopub.status.busy": "2026-05-14T11:57:56.652121Z",
     "iopub.status.idle": "2026-05-14T11:58:02.311773Z",
     "shell.execute_reply": "2026-05-14T11:58:02.311237Z",
     "shell.execute_reply.started": "2026-05-14T11:57:56.652276Z"
    },
    "id": "yd3OUQIWNhRm",
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "from langchain_openai  import ChatOpenAI\n",
    "\n",
    "\n",
    "os.environ['OPENAI_API_KEY'] = \"sk-q4U13e0730fabd22176af25b9454c7896610386fc114fi4t\"\n",
    "os.environ[\"OPENAI_API_BASE\"] = \"https://api.gptsapi.net/v1\"\n",
    "chat = ChatOpenAI(\n",
    "    openai_api_key=os.environ[\"OPENAI_API_KEY\"],\n",
    "    model='gpt-4o-mini'\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "YAxQlUl1Cwom"
   },
   "source": [
    "OpenAI Python 的例子\n",
    "```python\n",
    "[\n",
    "    {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
    "    {\"role\": \"user\", \"content\": \"Knock knock.\"},\n",
    "    {\"role\": \"assistant\", \"content\": \"Who's there?\"},\n",
    "    {\"role\": \"user\", \"content\": \"Orange.\"},\n",
    "]\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:37.680233Z",
     "iopub.status.busy": "2026-05-14T12:06:37.680066Z",
     "iopub.status.idle": "2026-05-14T12:06:37.683367Z",
     "shell.execute_reply": "2026-05-14T12:06:37.682759Z",
     "shell.execute_reply.started": "2026-05-14T12:06:37.680219Z"
    },
    "id": "jbPKtHYsNhRn",
    "tags": []
   },
   "outputs": [],
   "source": [
    "\n",
    "from langchain_core.messages import (\n",
    "    SystemMessage,\n",
    "    HumanMessage,\n",
    "    AIMessage\n",
    ")\n",
    "\n",
    "\n",
    "messages = [\n",
    "    SystemMessage(content=\"You are a helpful assistant.\"),\n",
    "    HumanMessage(content=\"Knock knock.\"),\n",
    "    AIMessage(content=\"Who's there?\"),\n",
    "    HumanMessage(content=\"Orange\"),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:39.067600Z",
     "iopub.status.busy": "2026-05-14T12:06:39.067423Z",
     "iopub.status.idle": "2026-05-14T12:06:40.706470Z",
     "shell.execute_reply": "2026-05-14T12:06:40.705722Z",
     "shell.execute_reply.started": "2026-05-14T12:06:39.067583Z"
    },
    "id": "11iDGkWPNhRn",
    "outputId": "e3698bd8-b14b-4abf-957f-041c0abd7f7f",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Orange who?\n"
     ]
    }
   ],
   "source": [
    "res = chat.invoke(messages)\n",
    "print(res.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "m7db28OOMjig"
   },
   "source": [
    "因为 `res`也是`AIMessage`属性，所以我们可以直接进行添加，即可实现下一次的响应"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecutionIndicator": {
     "show": false
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:40.707376Z",
     "iopub.status.busy": "2026-05-14T12:06:40.707068Z",
     "iopub.status.idle": "2026-05-14T12:06:41.998324Z",
     "shell.execute_reply": "2026-05-14T12:06:41.997535Z",
     "shell.execute_reply.started": "2026-05-14T12:06:40.707351Z"
    },
    "id": "85dW12laNLmO",
    "outputId": "7e431e35-7673-41d3-8ac7-a4b352c1b1be",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[SystemMessage(content='You are a helpful assistant.', additional_kwargs={}, response_metadata={}), HumanMessage(content='Knock knock.', additional_kwargs={}, response_metadata={}), AIMessage(content=\"Who's there?\", additional_kwargs={}, response_metadata={}, tool_calls=[], invalid_tool_calls=[]), HumanMessage(content='Orange', additional_kwargs={}, response_metadata={}), AIMessage(content='Orange who?', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 4, 'prompt_tokens': 33, 'total_tokens': 37, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0, 'image_tokens': 0, 'text_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0, 'image_tokens': 0, 'text_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_eb37e061ec', 'id': 'chatcmpl-DfP7o3LMHwy79hjBJ8BsRiPGR8NgL', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019e2661-b8dc-7780-a36f-f6ffac93db98-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 33, 'output_tokens': 4, 'total_tokens': 37, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})]\n",
      "Orange you going to let me in?\n"
     ]
    }
   ],
   "source": [
    "messages.append(res)\n",
    "print(messages)\n",
    "res = chat.invoke(messages)\n",
    "\n",
    "print(res.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "1YtYH-1oNhRo",
    "tags": [
     "parameters"
    ]
   },
   "source": [
    "#### 处理LLM存在的缺陷\n",
    "1. 容易出现幻觉\n",
    "2. 信息滞后\n",
    "3. 专业领域深度知识匮乏\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecutionIndicator": {
     "show": false
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:42.867073Z",
     "iopub.status.busy": "2026-05-14T12:06:42.866898Z",
     "iopub.status.idle": "2026-05-14T12:06:42.869960Z",
     "shell.execute_reply": "2026-05-14T12:06:42.869286Z",
     "shell.execute_reply.started": "2026-05-14T12:06:42.867057Z"
    },
    "id": "1PZxdF06NhRp",
    "tags": []
   },
   "outputs": [],
   "source": [
    "messages = [\n",
    "    SystemMessage(content=\"你是一个专业的知识助手。\"),\n",
    "    HumanMessage(content=\"你知道baichuan2模型吗？\"),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:44.277029Z",
     "iopub.status.busy": "2026-05-14T12:06:44.276854Z",
     "iopub.status.idle": "2026-05-14T12:06:49.020273Z",
     "shell.execute_reply": "2026-05-14T12:06:49.019631Z",
     "shell.execute_reply.started": "2026-05-14T12:06:44.277014Z"
    },
    "id": "Vw-ylWZMQW5z",
    "outputId": "f4ff154a-c8af-457c-a2db-fa310b92ccc3",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "是的，我知道Baichuan 2模型。Baichuan 2是一个基于大规模数据训练的中文预训练语言模型，旨在处理各种自然语言处理任务。该模型的设计考虑了丰富的上下文信息，能够生成高质量的文本和理解复杂的语言表达。Baichuan 2在对话系统、文本生成、问答等应用中都有很好的表现。如果你有具体的问题或者需要了解更多的细节，可以告诉我！\n"
     ]
    }
   ],
   "source": [
    "res = chat.invoke(messages)\n",
    "print(res.content)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "XXir07e9yKNW"
   },
   "source": [
    "chatgpt AI无法满足我们在某些特定领域的专业需求，我们可以通过知识注入的方式，利用prompt来解决这种问题："
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecutionIndicator": {
     "show": false
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:49.020879Z",
     "iopub.status.busy": "2026-05-14T12:06:49.020746Z",
     "iopub.status.idle": "2026-05-14T12:06:49.024156Z",
     "shell.execute_reply": "2026-05-14T12:06:49.023499Z",
     "shell.execute_reply.started": "2026-05-14T12:06:49.020865Z"
    },
    "id": "6MTJRA2nQW_E",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Baichuan 2是一个大规模多语言语言模型，它专注于训练在多种语言中表现优异的模型，包括不仅限于英文。这使得Baichuan 2在处理各种语言的任务时能够取得显著的性能提升。\n",
      "Baichuan 2是从头开始训练的，使用了包括了2.6万亿个标记的庞大训练数据集。相对于以往的模型，Baichuan 2提供了更丰富的数据资源，从而能够更好地支持多语言的开发和应用。\n",
      "Baichuan 2不仅在通用任务上表现出色，还在特定领域（如医学和法律）的任务中展现了卓越的性能。这为特定领域的应用提供了强有力的支持。\n"
     ]
    }
   ],
   "source": [
    "\n",
    "baichuan2_information = [\n",
    "    \"Baichuan 2是一个大规模多语言语言模型，它专注于训练在多种语言中表现优异的模型，包括不仅限于英文。这使得Baichuan 2在处理各种语言的任务时能够取得显著的性能提升。\",\n",
    "    \"Baichuan 2是从头开始训练的，使用了包括了2.6万亿个标记的庞大训练数据集。相对于以往的模型，Baichuan 2提供了更丰富的数据资源，从而能够更好地支持多语言的开发和应用。\",\n",
    "    \"Baichuan 2不仅在通用任务上表现出色，还在特定领域（如医学和法律）的任务中展现了卓越的性能。这为特定领域的应用提供了强有力的支持。\"\n",
    "]\n",
    "\n",
    "source_knowledge = \"\\n\".join(baichuan2_information)\n",
    "print(source_knowledge)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:53.539749Z",
     "iopub.status.busy": "2026-05-14T12:06:53.539563Z",
     "iopub.status.idle": "2026-05-14T12:06:53.543018Z",
     "shell.execute_reply": "2026-05-14T12:06:53.542241Z",
     "shell.execute_reply.started": "2026-05-14T12:06:53.539732Z"
    },
    "id": "pdgyyDx2yx8M",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "基于以下内容回答问题：\n",
      "\n",
      "内容:\n",
      "Baichuan 2是一个大规模多语言语言模型，它专注于训练在多种语言中表现优异的模型，包括不仅限于英文。这使得Baichuan 2在处理各种语言的任务时能够取得显著的性能提升。\n",
      "Baichuan 2是从头开始训练的，使用了包括了2.6万亿个标记的庞大训练数据集。相对于以往的模型，Baichuan 2提供了更丰富的数据资源，从而能够更好地支持多语言的开发和应用。\n",
      "Baichuan 2不仅在通用任务上表现出色，还在特定领域（如医学和法律）的任务中展现了卓越的性能。这为特定领域的应用提供了强有力的支持。\n",
      "\n",
      "Query: 你知道baichuan2模型数据规模吗？\n"
     ]
    }
   ],
   "source": [
    "query = \"你知道baichuan2模型数据规模吗？\"\n",
    "\n",
    "prompt_template = f\"\"\"基于以下内容回答问题：\n",
    "\n",
    "内容:\n",
    "{source_knowledge}\n",
    "\n",
    "Query: {query}\"\"\"\n",
    "\n",
    "print(prompt_template)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:55.407436Z",
     "iopub.status.busy": "2026-05-14T12:06:55.407246Z",
     "iopub.status.idle": "2026-05-14T12:06:55.410707Z",
     "shell.execute_reply": "2026-05-14T12:06:55.410072Z",
     "shell.execute_reply.started": "2026-05-14T12:06:55.407420Z"
    },
    "id": "ZwSPm_3qzWB7",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[SystemMessage(content='你是一个专业的知识助手。', additional_kwargs={}, response_metadata={}), HumanMessage(content='你知道baichuan2模型吗？', additional_kwargs={}, response_metadata={}), HumanMessage(content='基于以下内容回答问题：\\n\\n内容:\\nBaichuan 2是一个大规模多语言语言模型，它专注于训练在多种语言中表现优异的模型，包括不仅限于英文。这使得Baichuan 2在处理各种语言的任务时能够取得显著的性能提升。\\nBaichuan 2是从头开始训练的，使用了包括了2.6万亿个标记的庞大训练数据集。相对于以往的模型，Baichuan 2提供了更丰富的数据资源，从而能够更好地支持多语言的开发和应用。\\nBaichuan 2不仅在通用任务上表现出色，还在特定领域（如医学和法律）的任务中展现了卓越的性能。这为特定领域的应用提供了强有力的支持。\\n\\nQuery: 你知道baichuan2模型数据规模吗？', additional_kwargs={}, response_metadata={})]\n"
     ]
    }
   ],
   "source": [
    "prompt = HumanMessage(\n",
    "    content=prompt_template\n",
    ")\n",
    "messages.append(prompt)\n",
    "print(messages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:06:56.857266Z",
     "iopub.status.busy": "2026-05-14T12:06:56.857082Z",
     "iopub.status.idle": "2026-05-14T12:07:00.005256Z",
     "shell.execute_reply": "2026-05-14T12:07:00.004616Z",
     "shell.execute_reply.started": "2026-05-14T12:06:56.857250Z"
    },
    "id": "P861bTreziWz",
    "outputId": "62aaf1ba-aebe-4576-c5cc-7203f677d205",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Baichuan 2模型使用了包含2.6万亿个标记的庞大训练数据集。这使得它在处理多种语言的任务时能够取得显著的性能提升。\n"
     ]
    }
   ],
   "source": [
    "res = chat.invoke(messages)\n",
    "print(res.content)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "-Npmwyy808i6"
   },
   "source": [
    "当我们注入一些专业的知识后，模型就能够很好的回答相关问题。\n",
    "如果每一个问题都去用相关的外部知识进行增强拼接的话，那么回答的准确性就大大增加？？？？"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "### 简单习题"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 习题1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:09:38.395349Z",
     "iopub.status.busy": "2025-10-24T14:09:38.395126Z",
     "iopub.status.idle": "2025-10-24T14:09:42.582938Z",
     "shell.execute_reply": "2025-10-24T14:09:42.582528Z",
     "shell.execute_reply.started": "2025-10-24T14:09:38.395336Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "库欣综合征的主要症状包括：\n",
      "\n",
      "- 向心性肥胖\n",
      "- 满月脸\n",
      "- 紫纹\n",
      "- 多血质外貌\n",
      "- 高血压\n",
      "- 糖代谢异常\n",
      "- 肌肉骨骼异常\n",
      "- 性功能改变\n",
      "- 造血系统的改变\n",
      "- 皮肤可能暗红，容易发生感染\n",
      "\n",
      "此外，可能出现电解质和酸碱平衡紊乱等情况。并发症可能包括感染、心血管疾病、骨质疏松、代谢综合征以及精神障碍。\n",
      "\n",
      "关于发病率，库欣综合征在20-45岁之间的年龄段较常见，女性发病率高于男性。每年每100万人中有2-3例新发病例，而每100万人中约有40例库欣综合征患者。\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# 尝试引入知识询问大模型 “你知道库欣综合症吗？”\n",
    "# TODO 结合百度百科 https://baike.baidu.com/item/%E5%BA%93%E6%AC%A3%E7%BB%BC%E5%90%88%E5%BE%81/2433615 \n",
    "messages = [\n",
    "    SystemMessage(content=\"你是一个专业的知识助手。\"),\n",
    "    # HumanMessage(content=\"你知道库欣综合症的主要症状和发病率吗？\"),\n",
    "]\n",
    "\n",
    "information = [\n",
    "    \"库欣综合征（又称皮质醇过多综合征）是由肾上腺皮质分泌过度的糖皮质激素引起的一种临床综合征。该症状可发生于任何年龄，常见于20-45岁之间，女性发病率高于男性。\",\n",
    "    \"鉴于其发病率，每年每100万人中有2-3例新发病例，而每100万人中约有40例库欣综合征患者。\",\n",
    "    \"库欣综合征的症状包括向心性肥胖，满月脸、紫纹、多血质外貌，高血压、糖代谢异常、肌肉骨骼异常、性功能改变、以及造血系统的改变等。该病患者的皮肤可能暗红，容易发生感染。在一些极少数情况下，患者可能会有电解质和酸碱平衡紊乱等情况出现。该病患者中可能出现的并发症包括感染、心血管疾病、骨质疏松、代谢综合征以及精神障碍。\"\n",
    "]\n",
    "\n",
    "source_knowledge = \"\\n\".join(information)\n",
    "# print(source_knowledge)\n",
    "query = \"你知道库欣综合症的主要症状和发病率吗？\"\n",
    "prompt_template = f\"\"\"基于以下内容回答问题：\n",
    "\n",
    "内容:\n",
    "{source_knowledge}\n",
    "\n",
    "Query: {query}\"\"\"\n",
    "\n",
    "prompt = HumanMessage(\n",
    "    content=prompt_template\n",
    ")\n",
    "messages.append(prompt)\n",
    "\n",
    "res = chat.invoke(messages)\n",
    "print(res.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 习题2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:09:52.946207Z",
     "iopub.status.busy": "2025-10-24T14:09:52.945924Z",
     "iopub.status.idle": "2025-10-24T14:09:55.172186Z",
     "shell.execute_reply": "2025-10-24T14:09:55.171756Z",
     "shell.execute_reply.started": "2025-10-24T14:09:52.946187Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "XXXX\n",
      "XXXX\n",
      "XXXX\n",
      "莱姆症是一种由蜱传播的细菌感染，主要由伯氏疏螺旋体（Borrelia burgdorferi）引起。感染通常始于蜱叮咬，叮咬部位可能出现红色皮疹，类似“靶心”外观，同时患者可能伴随发热、疲劳、头痛和肌肉疼痛等症状。如果不及时治疗，莱姆症可能导致更严重的并发症，如关节炎、神经系统问题和心脏问题。预防措施包括避免蜱叮咬和在可能有蜱的环境中穿着适当的衣物。治疗通常使用抗生素，如果有相关症状或接触过蜱，建议及时就医。\n"
     ]
    }
   ],
   "source": [
    "# 尝试引入知识询问大模型 “你知道莱姆症吗？”\n",
    "# TODO 结合百度百科 https://baike.baidu.com/item/%E8%8E%B1%E5%A7%86%E7%97%85\n",
    "messages = [\n",
    "    SystemMessage(content=\"你是一个专业的知识助手。\"),\n",
    "    # HumanMessage(content=\"你知道莱姆症吗？\"),\n",
    "]\n",
    "\n",
    "information = [\n",
    "    \"XXXX\",\n",
    "    \"XXXX\",\n",
    "    \"XXXX\"\n",
    "]\n",
    "\n",
    "source_knowledge = \"\\n\".join(information)\n",
    "print(source_knowledge)\n",
    "query = \"你知道莱姆症吗？\"\n",
    "prompt_template = f\"\"\"基于以下内容回答问题：\n",
    "\n",
    "内容:\n",
    "{source_knowledge}\n",
    "\n",
    "Query: {query}\"\"\"\n",
    "\n",
    "prompt = HumanMessage(\n",
    "    content=prompt_template\n",
    ")\n",
    "messages.append(prompt)\n",
    "\n",
    "res = chat.invoke(messages)\n",
    "print(res.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "qXfTkYm01oWp",
    "tags": []
   },
   "source": [
    "### 创建一个RAG对话模型"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "i8TTksfT2K3r",
    "tags": []
   },
   "source": [
    "#### 1. 加载数据 （以baichuan2论文为例）\n",
    "\n",
    "   https://arxiv.org/pdf/2309.10305v2.pdf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:07:07.867250Z",
     "iopub.status.busy": "2026-05-14T12:07:07.867072Z",
     "iopub.status.idle": "2026-05-14T12:07:11.677343Z",
     "shell.execute_reply": "2026-05-14T12:07:11.676663Z",
     "shell.execute_reply.started": "2026-05-14T12:07:07.867234Z"
    },
    "id": "iIVOnz1TxkZ4",
    "outputId": "aeeee9d2-98e0-42b1-b8f5-8ef430eb7f98",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Looking in indexes: https://mirrors.cloud.aliyuncs.com/pypi/simple\n",
      "Collecting pypdf\n",
      "  Downloading https://mirrors.cloud.aliyuncs.com/pypi/packages/07/b1/68feb7eb3b99f0c020b414234825f4a5d70e0126c18d933770e8c93a35fc/pypdf-6.11.0-py3-none-any.whl (338 kB)\n",
      "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m338.8/338.8 kB\u001b[0m \u001b[31m10.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
      "\u001b[?25hRequirement already satisfied: langchain-text-splitters in /usr/local/lib/python3.11/site-packages (1.1.2)\n",
      "Requirement already satisfied: langchain-core<2.0.0,>=1.2.31 in /usr/local/lib/python3.11/site-packages (from langchain-text-splitters) (1.4.0)\n",
      "Requirement already satisfied: jsonpatch<2.0.0,>=1.33.0 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (1.33)\n",
      "Requirement already satisfied: langchain-protocol>=0.0.14 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.0.15)\n",
      "Requirement already satisfied: langsmith<1.0.0,>=0.3.45 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.8.4)\n",
      "Requirement already satisfied: packaging>=23.2.0 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (26.0)\n",
      "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.12.3)\n",
      "Requirement already satisfied: pyyaml<7.0.0,>=5.3.0 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (6.0.3)\n",
      "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (9.1.4)\n",
      "Requirement already satisfied: typing-extensions<5.0.0,>=4.7.0 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (4.15.0)\n",
      "Requirement already satisfied: uuid-utils<1.0,>=0.12.0 in /usr/local/lib/python3.11/site-packages (from langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.15.0)\n",
      "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/site-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.1.1)\n",
      "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.28.1)\n",
      "Requirement already satisfied: orjson>=3.9.14 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.11.7)\n",
      "Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (1.0.0)\n",
      "Requirement already satisfied: requests>=2.0.0 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.32.5)\n",
      "Requirement already satisfied: xxhash>=3.0.0 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.6.0)\n",
      "Requirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.11/site-packages (from langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.25.0)\n",
      "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.7.0)\n",
      "Requirement already satisfied: pydantic-core==2.41.4 in /usr/local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.41.4)\n",
      "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/lib/python3.11/site-packages (from pydantic<3.0.0,>=2.7.4->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.4.2)\n",
      "Requirement already satisfied: anyio in /usr/local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (4.12.1)\n",
      "Requirement already satisfied: certifi in /usr/local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2026.2.25)\n",
      "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (1.0.9)\n",
      "Requirement already satisfied: idna in /usr/local/lib/python3.11/site-packages (from httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.11)\n",
      "Requirement already satisfied: h11>=0.16 in /usr/local/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (0.16.0)\n",
      "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.11/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (3.4.5)\n",
      "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/site-packages (from requests>=2.0.0->langsmith<1.0.0,>=0.3.45->langchain-core<2.0.0,>=1.2.31->langchain-text-splitters) (2.6.3)\n",
      "Installing collected packages: pypdf\n",
      "Successfully installed pypdf-6.11.0\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.3.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m26.1.1\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "! pip3 install pypdf langchain-text-splitters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:07:12.717275Z",
     "iopub.status.busy": "2026-05-14T12:07:12.717067Z",
     "iopub.status.idle": "2026-05-14T12:07:17.580106Z",
     "shell.execute_reply": "2026-05-14T12:07:17.579573Z",
     "shell.execute_reply.started": "2026-05-14T12:07:12.717255Z"
    },
    "id": "VD-UF8z06txb",
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "\n",
    "loader = PyPDFLoader(\"./baichuan2.pdf\")\n",
    "\n",
    "pages = loader.load_and_split()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-05-14T12:07:27.086498Z",
     "iopub.status.busy": "2026-05-14T12:07:27.086058Z",
     "iopub.status.idle": "2026-05-14T12:07:27.091668Z",
     "shell.execute_reply": "2026-05-14T12:07:27.091070Z",
     "shell.execute_reply.started": "2026-05-14T12:07:27.086478Z"
    },
    "id": "KsCy_vTs68I-",
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-09-21T00:15:31+00:00', 'author': '', 'keywords': '', 'moddate': '2023-09-21T00:15:31+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': './baichuan2.pdf', 'total_pages': 28, 'page': 0, 'page_label': '1'}, page_content='Baichuan 2: Open Large-scale Language Models\\nAiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan\\nDian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai\\nGuosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji\\nJian Xie, Juntao Dai, Kun Fang, Lei Su, Liang Song, Lifeng Liu, Liyun Ru, Luyao Ma\\nMang Wang, Mickel Liu, MingAn Lin, Nuolan Nie, Peidong Guo, Ruiyang Sun\\nTao Zhang, Tianpeng Li, Tianyu Li, Wei Cheng, Weipeng Chen, Xiangrong Zeng\\nXiaochuan Wang, Xiaoxi Chen, Xin Men, Xin Yu, Xuehai Pan, Yanjun Shen, Yiding Wang\\nYiyu Li, Youxin Jiang, Yuchen Gao, Yupeng Zhang, Zenan Zhou, Zhiying Wu\\nBaichuan Inc.\\nAbstract\\nLarge language models (LLMs) have\\ndemonstrated remarkable performance on\\na variety of natural language tasks based\\non just a few examples of natural language\\ninstructions, reducing the need for extensive\\nfeature engineering. However, most powerful\\nLLMs are closed-source or limited in their\\ncapability for languages other than English. In\\nthis technical report, we present Baichuan 2,\\na series of large-scale multilingual language\\nmodels containing 7 billion and 13 billion\\nparameters, trained from scratch, on 2.6 trillion\\ntokens. Baichuan 2 matches or outperforms\\nother open-source models of similar size on\\npublic benchmarks like MMLU, CMMLU,\\nGSM8K, and HumanEval. Furthermore,\\nBaichuan 2 excels in vertical domains such\\nas medicine and law. We will release all\\npre-training model checkpoints to benefit the\\nresearch community in better understanding\\nthe training dynamics of Baichuan 2.\\n1 Introduction\\nThe field of large language models has witnessed\\npromising and remarkable progress in recent years.\\nThe size of language models has grown from\\nmillions of parameters, such as ELMo (Peters\\net al., 2018), GPT-1 (Radford et al., 2018), to\\nbillions or even trillions of parameters such as GPT-\\n3 (Brown et al., 2020), PaLM (Chowdhery et al.,\\n2022; Anil et al., 2023) and Switch Transformers\\n(Fedus et al., 2022). This increase in scale has\\nled to significant improvements in the capabilities\\nof language models, enabling more human-like\\nfluency and the ability to perform a diverse range\\nof natural language tasks. With the introduction of\\nAuthors are listed alphabetically, correspondent:\\ndaniel@baichuan-inc.com.\\nChatGPT (OpenAI, 2022) from OpenAI, the power\\nof these models to generate human-like text has\\ncaptured widespread public attention. ChatGPT\\ndemonstrates strong language proficiency across\\na variety of domains, from conversing casually to\\nexplaining complex concepts. This breakthrough\\nhighlights the potential for large language models\\nto automate tasks involving natural language\\ngeneration and comprehension.\\nWhile there have been exciting breakthroughs\\nand applications of LLMs, most leading LLMs like\\nGPT-4 (OpenAI, 2023), PaLM-2 (Anil et al., 2023),\\nand Claude (Claude, 2023) remain closed-sourced.\\nDevelopers and researchers have limited access to\\nthe full model parameters, making it difficult for\\nthe community to deeply study or fine-tune these\\nsystems. More openness and transparency around\\nLLMs could accelerate research and responsible\\ndevelopment within this rapidly advancing field.\\nLLaMA (Touvron et al., 2023a), a series of large\\nlanguage models developed by Meta containing up\\nto 65 billion parameters, has significantly benefited\\nthe LLM research community by being fully open-\\nsourced. The open nature of LLaMA, along with\\nother open-source LLMs such as OPT (Zhang\\net al., 2022), Bloom (Scao et al., 2022), MPT\\n(MosaicML, 2023) and Falcon (Penedo et al.,\\n2023), enables researchers to freely access the\\nmodels for examination, experimentation, and\\nfurther development. This transparency and access\\ndistinguishes LLaMA from other proprietary\\nLLMs. By providing full access, the open-source\\nLLMs have accelerated research and advances in\\nthe field, leading to new models like Alpaca (Taori\\net al., 2023), Vicuna (Chiang et al., 2023), and')"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pages[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "OZN6yzwi7J61",
    "tags": []
   },
   "source": [
    "#### 2. 知识切片 将文档分割成均匀的块。每个块是一段原始文本"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:07:29.889506Z",
     "iopub.status.busy": "2026-05-14T12:07:29.889245Z",
     "iopub.status.idle": "2026-05-14T12:07:29.897113Z",
     "shell.execute_reply": "2026-05-14T12:07:29.896564Z",
     "shell.execute_reply.started": "2026-05-14T12:07:29.889485Z"
    },
    "id": "wt3G5-ph7gho",
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_text_splitters  import RecursiveCharacterTextSplitter\n",
    "\n",
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size = 500,\n",
    "    chunk_overlap = 50,\n",
    ")\n",
    "\n",
    "docs = text_splitter.split_documents(pages)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:07:31.887262Z",
     "iopub.status.busy": "2026-05-14T12:07:31.887083Z",
     "iopub.status.idle": "2026-05-14T12:07:31.890387Z",
     "shell.execute_reply": "2026-05-14T12:07:31.889662Z",
     "shell.execute_reply.started": "2026-05-14T12:07:31.887246Z"
    },
    "id": "BCXqYY4D7gkp",
    "outputId": "d92b8a52-fe1e-4480-db30-1aa2f15b6716",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "216\n",
      "page_content='languages, such as Chinese.\n",
      "In this technical report, we introduce Baichuan\n",
      "2, a series of large-scale multilingual language\n",
      "models. Baichuan 2 has two separate models,\n",
      "Baichuan 2-7B with 7 billion parameters and\n",
      "Baichuan 2-13B with 13 billion parameters. Both\n",
      "models were trained on 2.6 trillion tokens, which\n",
      "to our knowledge is the largest to date, more than\n",
      "double that of Baichuan 1 (Baichuan, 2023b,a).\n",
      "With such a massive amount of training data,' metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-09-21T00:15:31+00:00', 'author': '', 'keywords': '', 'moddate': '2023-09-21T00:15:31+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': './baichuan2.pdf', 'total_pages': 28, 'page': 1, 'page_label': '2'}\n"
     ]
    }
   ],
   "source": [
    "len(docs)\n",
    "print(len(docs))\n",
    "print(docs[11])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "lgABYtKp8_Ke"
   },
   "source": [
    "#### 3. 利用embedding模型对每个文本片段进行向量化，并储存到向量数据库中"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:08:26.276661Z",
     "iopub.status.busy": "2026-05-14T12:08:26.276502Z",
     "iopub.status.idle": "2026-05-14T12:08:36.721019Z",
     "shell.execute_reply": "2026-05-14T12:08:36.720389Z",
     "shell.execute_reply.started": "2026-05-14T12:08:26.276647Z"
    },
    "id": "AmI_-A1-ziZN",
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "\n",
    "\n",
    "embed_model = OpenAIEmbeddings()\n",
    "vectorstore = Chroma.from_documents(documents=docs, embedding=embed_model , collection_name=\"openai_embed\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "G-fgbDgQC77h"
   },
   "source": [
    "#### 4. 通过向量相似度检索和问题最相关的K个文档。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-03-17T13:52:20.134171Z",
     "iopub.status.busy": "2026-03-17T13:52:20.134035Z",
     "iopub.status.idle": "2026-03-17T13:52:21.794187Z",
     "shell.execute_reply": "2026-03-17T13:52:21.793538Z",
     "shell.execute_reply.started": "2026-03-17T13:52:20.134158Z"
    },
    "id": "zuoRfbU_Du3S",
    "tags": []
   },
   "outputs": [],
   "source": [
    "query = \"How large is the baichuan2 vocabulary?\"\n",
    "result = vectorstore.similarity_search(query ,k = 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-03-17T13:52:21.794789Z",
     "iopub.status.busy": "2026-03-17T13:52:21.794649Z",
     "iopub.status.idle": "2026-03-17T13:52:21.797839Z",
     "shell.execute_reply": "2026-03-17T13:52:21.797299Z",
     "shell.execute_reply.started": "2026-03-17T13:52:21.794775Z"
    },
    "id": "dliY5xHaC2NN",
    "outputId": "40191eee-e17e-4283-e63b-f5ff2088bdf5",
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Document(metadata={'creationdate': '2023-09-21T00:15:31+00:00', 'page_label': '2', 'subject': '', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'trapped': '/False', 'total_pages': 28, 'author': '', 'keywords': '', 'title': '', 'moddate': '2023-09-21T00:15:31+00:00', 'page': 1, 'source': './baichuan2.pdf', 'creator': 'LaTeX with hyperref', 'producer': 'pdfTeX-1.40.25'}, page_content='languages, such as Chinese.\\nIn this technical report, we introduce Baichuan\\n2, a series of large-scale multilingual language\\nmodels. Baichuan 2 has two separate models,\\nBaichuan 2-7B with 7 billion parameters and\\nBaichuan 2-13B with 13 billion parameters. Both\\nmodels were trained on 2.6 trillion tokens, which\\nto our knowledge is the largest to date, more than\\ndouble that of Baichuan 1 (Baichuan, 2023b,a).\\nWith such a massive amount of training data,'),\n",
       " Document(metadata={'page_label': '1', 'creationdate': '2023-09-21T00:15:31+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'trapped': '/False', 'moddate': '2023-09-21T00:15:31+00:00', 'title': '', 'keywords': '', 'total_pages': 28, 'creator': 'LaTeX with hyperref', 'source': './baichuan2.pdf', 'page': 0, 'producer': 'pdfTeX-1.40.25', 'subject': '', 'author': ''}, page_content='Baichuan 2: Open Large-scale Language Models\\nAiyuan Yang, Bin Xiao, Bingning Wang, Borong Zhang, Chao Yin, Chenxu Lv, Da Pan\\nDian Wang, Dong Yan, Fan Yang, Fei Deng, Feng Wang, Feng Liu, Guangwei Ai\\nGuosheng Dong, Haizhou Zhao, Hang Xu, Haoze Sun, Hongda Zhang, Hui Liu, Jiaming Ji\\nJian Xie, Juntao Dai, Kun Fang, Lei Su, Liang Song, Lifeng Liu, Liyun Ru, Luyao Ma\\nMang Wang, Mickel Liu, MingAn Lin, Nuolan Nie, Peidong Guo, Ruiyang Sun')]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "result"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "ymUjel7-E-t1"
   },
   "source": [
    "#### 5. 原始`query`与检索得到的文本组合起来输入到语言模型，得到最终的回答"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-05-14T12:12:54.690458Z",
     "iopub.status.busy": "2026-05-14T12:12:54.690253Z",
     "iopub.status.idle": "2026-05-14T12:12:54.693477Z",
     "shell.execute_reply": "2026-05-14T12:12:54.692940Z",
     "shell.execute_reply.started": "2026-05-14T12:12:54.690442Z"
    },
    "id": "9wBIBDnIC2P8",
    "tags": []
   },
   "outputs": [],
   "source": [
    "def augment_prompt(query: str):\n",
    "  # 获取top3的文本片段\n",
    "  results = vectorstore.similarity_search(query, k=3)\n",
    "  source_knowledge = \"\\n\".join([x.page_content for x in results])\n",
    "  # 构建prompt\n",
    "  augmented_prompt = f\"\"\"Using the contexts below, answer the query.\n",
    "\n",
    "  contexts:\n",
    "  {source_knowledge}\n",
    "\n",
    "  query: {query}\"\"\"\n",
    "  return augmented_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2026-05-14T12:12:55.951732Z",
     "iopub.status.busy": "2026-05-14T12:12:55.951565Z",
     "iopub.status.idle": "2026-05-14T12:13:00.291077Z",
     "shell.execute_reply": "2026-05-14T12:13:00.290401Z",
     "shell.execute_reply.started": "2026-05-14T12:12:55.951717Z"
    },
    "id": "JHTutK09GRSx",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Using the contexts below, answer the query.\n",
      "\n",
      "  contexts:\n",
      "  languages, such as Chinese.\n",
      "In this technical report, we introduce Baichuan\n",
      "2, a series of large-scale multilingual language\n",
      "models. Baichuan 2 has two separate models,\n",
      "Baichuan 2-7B with 7 billion parameters and\n",
      "Baichuan 2-13B with 13 billion parameters. Both\n",
      "models were trained on 2.6 trillion tokens, which\n",
      "to our knowledge is the largest to date, more than\n",
      "double that of Baichuan 1 (Baichuan, 2023b,a).\n",
      "With such a massive amount of training data,\n",
      "Baichuan 1-13B-Base 26.76 4.84 11.59 22.80\n",
      "13B\n",
      "Baichuan 2-13B-Base 52.77 10.08 17.07 30.20\n",
      "Table 6: The result of Baichuan 2 compared with other models on mathematics and coding.\n",
      "models, Baichuan 2-7B-Chat and Baichuan 2-\n",
      "13B-Chat, optimized to follow human instructions.\n",
      "These models excel at dialogue and context\n",
      "understanding. We will elaborate on our\n",
      "approaches to improve the safety of Baichuan 2.\n",
      "By open-sourcing these models, we hope to enable\n",
      "the community to further improve the safety of\n",
      "large language models, facilitating more research\n",
      "on responsible LLMs development.\n",
      "Furthermore, in spirit of research collaboration\n",
      "\n",
      "  query: 你知道baichuan2模型数据规模吗？\n"
     ]
    }
   ],
   "source": [
    "print(augment_prompt(query))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T12:13:00.291677Z",
     "iopub.status.busy": "2026-05-14T12:13:00.291539Z",
     "iopub.status.idle": "2026-05-14T12:13:04.017885Z",
     "shell.execute_reply": "2026-05-14T12:13:04.017316Z",
     "shell.execute_reply.started": "2026-05-14T12:13:00.291662Z"
    },
    "id": "sPNBZlRPGlDB",
    "outputId": "8d453129-f6c5-4553-877f-b692c337efdc",
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "是的，Baichuan 2模型的训练数据规模达到了2.6万亿个标记，这是迄今为止最大的训练数据量，超过了Baichuan 1模型的两倍。Baichuan 2有两个版本的模型，分别是Baichuan 2-7B（拥有70亿个参数）和Baichuan 2-13B（拥有130亿个参数）。\n"
     ]
    }
   ],
   "source": [
    "# 创建prompt\n",
    "messages = [\n",
    "    SystemMessage(content=\"你是一个专业的知识助手。\"),\n",
    "]\n",
    "prompt = HumanMessage(\n",
    "    content=augment_prompt(query)\n",
    ")\n",
    "\n",
    "messages.append(prompt)\n",
    "\n",
    "res = chat.invoke(messages)\n",
    "\n",
    "print(res.content)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "tags": []
   },
   "source": [
    "### 习题医学文献RAG实操"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:15:04.877755Z",
     "iopub.status.busy": "2025-10-24T14:15:04.877533Z",
     "iopub.status.idle": "2025-10-24T14:15:05.123756Z",
     "shell.execute_reply": "2025-10-24T14:15:05.123338Z",
     "shell.execute_reply.started": "2025-10-24T14:15:04.877741Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "page_content='1\n",
      "弥漫性大 B 细胞淋巴瘤诊疗指南\n",
      "（2022 年版）\n",
      "一、概述\n",
      "弥漫性大 B 细胞淋巴瘤（diffuse large B cell lymphoma，\n",
      "DLBCL）是一种来源于成熟 B 细胞的侵袭性肿瘤，是最常见的非霍\n",
      "奇金淋巴瘤类型，约占全部非霍奇金淋巴瘤的 25%～50%。DLBCL 临\n",
      "床异质性大，2016 年 WHO 分类中列出了下列大B 细胞淋巴瘤亚型：\n",
      "1.非特指型。\n",
      "（1）生发中心 B 细胞亚型。\n",
      "（2）活化B 细胞亚型。\n",
      "2.富 T/组织细胞的大B 细胞淋巴瘤。\n",
      "3.原发中枢神经系统弥漫大 B 细胞淋巴瘤。\n",
      "4.原发皮肤弥漫大 B 细胞淋巴瘤，腿型。\n",
      "5.EB 病毒阳性弥漫大B 细胞淋巴瘤，非特指型。\n",
      "6.EB 病毒阳性粘膜溃疡。\n",
      "7.慢性炎症相关的弥漫大 B 细胞淋巴瘤。\n",
      "8.淋巴瘤样肉芽肿\n",
      "9.原发纵隔大 B 细胞淋巴瘤。\n",
      "10.血管内大B 细胞淋巴瘤。\n",
      "11.ALK 阳性大B 细胞淋巴瘤。' metadata={'producer': '', 'creator': 'WPS 文字', 'creationdate': '2022-03-29T10:18:59+02:00', 'author': 'Zhou, Yaping (ELS-BEI)', 'comments': '', 'company': '', 'keywords': '', 'moddate': '2022-04-15T17:30:48+08:00', 'sourcemodified': \"D:20220329101859+02'18'\", 'subject': '', 'title': '', 'trapped': '/False', 'source': './弥漫性大B细胞淋巴瘤诊疗指南+（2022年版）.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}\n"
     ]
    }
   ],
   "source": [
    "from langchain_community.document_loaders import PyPDFLoader\n",
    "loader = PyPDFLoader(\"./弥漫性大B细胞淋巴瘤诊疗指南+（2022年版）.pdf\")\n",
    "pages = loader.load_and_split()\n",
    "from langchain_text_splitters  import RecursiveCharacterTextSplitter\n",
    "\n",
    "text_splitter = RecursiveCharacterTextSplitter(\n",
    "    chunk_size = 500,\n",
    "    chunk_overlap = 50,\n",
    ")\n",
    "docs = text_splitter.split_documents(pages)\n",
    "print(docs[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:15:20.358465Z",
     "iopub.status.busy": "2025-10-24T14:15:20.358247Z",
     "iopub.status.idle": "2025-10-24T14:15:21.824250Z",
     "shell.execute_reply": "2025-10-24T14:15:21.823798Z",
     "shell.execute_reply.started": "2025-10-24T14:15:20.358450Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "from langchain_openai import OpenAIEmbeddings\n",
    "from langchain_community.vectorstores import Chroma\n",
    "embed_model = OpenAIEmbeddings()\n",
    "vectorstore = Chroma.from_documents(documents=docs, embedding=embed_model , collection_name=\"openai_embed\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:15:23.827465Z",
     "iopub.status.busy": "2025-10-24T14:15:23.827238Z",
     "iopub.status.idle": "2025-10-24T14:15:24.096764Z",
     "shell.execute_reply": "2025-10-24T14:15:24.096240Z",
     "shell.execute_reply.started": "2025-10-24T14:15:23.827447Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[Document(metadata={'sourcemodified': \"D:20220329101859+02'18'\", 'page': 8, 'page_label': '9', 'company': '', 'keywords': '', 'source': './弥漫性大B细胞淋巴瘤诊疗指南+（2022年版）.pdf', 'comments': '', 'creationdate': '2022-03-29T10:18:59+02:00', 'trapped': '/False', 'producer': '', 'moddate': '2022-04-15T17:30:48+08:00', 'creator': 'WPS 文字', 'subject': '', 'title': '', 'author': 'Zhou, Yaping (ELS-BEI)', 'total_pages': 15}, page_content='9\\n高危 6-8\\n（二）其他标准化疗治疗患者的主要不良预后因素。\\n1.非生发中心 B 细胞亚型（活化 B 细胞亚型）。\\n2.MYC和 BCL-2 和/或BCL-6 重排。\\n3.MYC和 BCL2 高表达。\\n4.TP53 突变。\\n5.EB 病毒相关疾病。\\n四、治疗\\n（一）治疗目标。\\n持久的完全缓解以期根治。\\n（二）诱导治疗。\\n弥漫大 B 细胞淋巴瘤的治疗根据患者年龄、Ann Arbor 分期和\\nIPI以及肿瘤的免疫和分子表型特征选择适当的方案。\\n1.局限期弥漫性大 B 细胞淋巴瘤[Ann ArborⅠ期和Ⅱ期非大肿\\n块（＜7.5 cm）疾病]：一线治疗包括 R-CHOP 方案化疗三个疗程，\\n并对受累部位进行放疗；或 R-CHOP 方案化疗 4 个疗程加 2 程利妥\\n昔单抗治疗（IPI=0 分）；或R-CHOP 方案化疗6 个疗程±受累野放\\n疗。\\n2.局限期弥漫性大 B 细胞淋巴瘤[Ann ArborⅠ期和Ⅱ期伴有大'), Document(metadata={'producer': '', 'sourcemodified': \"D:20220329101859+02'18'\", 'creationdate': '2022-03-29T10:18:59+02:00', 'keywords': '', 'author': 'Zhou, Yaping (ELS-BEI)', 'source': './弥漫性大B细胞淋巴瘤诊疗指南+（2022年版）.pdf', 'total_pages': 15, 'title': '', 'moddate': '2022-04-15T17:30:48+08:00', 'subject': '', 'company': '', 'page_label': '10', 'comments': '', 'creator': 'WPS 文字', 'page': 9, 'trapped': '/False'}, page_content='10\\n肿块（≥7.5 cm）]：一线治疗 R-CHOP 方案化疗 6 个疗程，在某些\\n患者进行放疗；初始大肿块（＞7.5 cm）部位放疗。\\n3.晚期弥漫大 B 细胞淋巴瘤（Ann Arbor Ⅲ～Ⅳ期）：一线治\\n疗包括：使用 R-CHOP 方案或 R-DA-EPOCH 方案进行化疗。初始大肿\\n块（＞7.5 cm）部位放疗。\\n4.对于高龄或不适合标准化疗的患者，可以考虑 R-GemOx、R-\\nminiCHOP、R-CDOP、R-CEPP、R-GCVP 等或靶向治疗为主的方案。\\n5.双/三重打击淋巴瘤：倾向于高剂量方案：R-DA-EPOCH、R-\\nhyperCVAD/MA 或者 R-CODOX/MA 方案，获得完全患者患者可考虑进\\n行自体外周血干细胞移植。\\n6.维持治疗：对于老年患者（≥60 岁）患者诱导治疗结束后可\\n以考虑来那度胺维持治疗。\\n7.中枢神经系统淋巴瘤预防。\\n适用于以下高危因素患者：\\n（1）由 IPI 评分中的 5 个危险因素和肾上腺/肾脏累及组成\\nCNS-IPI，积分4～6 分的高危患者。\\n（2）累及以下器官：睾丸、乳腺、鼻窦、硬脑膜等。\\n（3）人类免疫缺陷病毒相关淋巴瘤。')]\n"
     ]
    }
   ],
   "source": [
    "#query = \"NCCNIPI定义的结外受累器官有哪些\"\n",
    "query = \"高龄弥漫性大B淋巴瘤患者的应选择哪些治疗方案\"\n",
    "\n",
    "result = vectorstore.similarity_search(query ,k = 2)\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:15:26.588653Z",
     "iopub.status.busy": "2025-10-24T14:15:26.588426Z",
     "iopub.status.idle": "2025-10-24T14:15:26.591610Z",
     "shell.execute_reply": "2025-10-24T14:15:26.591169Z",
     "shell.execute_reply.started": "2025-10-24T14:15:26.588638Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "def augment_prompt(query: str):\n",
    "  # 获取top3的文本片段\n",
    "  results = vectorstore.similarity_search(query, k=3)\n",
    "  source_knowledge = \"\\n\".join([x.page_content for x in results])\n",
    "  # 构建prompt\n",
    "  augmented_prompt = f\"\"\"Using the contexts below, answer the query.\n",
    "\n",
    "  contexts:\n",
    "  {source_knowledge}\n",
    "\n",
    "  query: {query}\"\"\"\n",
    "  return augmented_prompt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2025-10-24T14:16:03.914225Z",
     "iopub.status.busy": "2025-10-24T14:16:03.914011Z",
     "iopub.status.idle": "2025-10-24T14:16:08.338282Z",
     "shell.execute_reply": "2025-10-24T14:16:08.337837Z",
     "shell.execute_reply.started": "2025-10-24T14:16:03.914211Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "对于高龄或不适合标准化疗的弥漫性大B细胞淋巴瘤患者，可以考虑以下治疗方案：\n",
      "\n",
      "1. **R-GemOx**（利妥昔单抗-吉西他滨-长春新碱治疗方案）。\n",
      "2. **R-miniCHOP**（利妥昔单抗-小剂量CHOP化疗）。\n",
      "3. **R-CDOP**（利妥昔单抗-CHOP化疗）。\n",
      "4. **R-CEPP**（利妥昔单抗-厄达莫司-多西他赛-环磷酰胺治疗方案）。\n",
      "5. **R-GCVP**（利妥昔单抗-吉西他滨-环磷酰胺-长春新碱治疗方案）。\n",
      "6. **靶向治疗**（根据信息判断选择适合的靶向药物）。\n",
      "\n",
      "这些方案的选择应根据患者的具体健康状况、病情进展以及医生的专业评估来确定。\n"
     ]
    }
   ],
   "source": [
    "\n",
    "from langchain_core.messages import (\n",
    "    SystemMessage,\n",
    "    HumanMessage,\n",
    "    AIMessage\n",
    ")\n",
    "\n",
    "# 创建prompt\n",
    "messages = [\n",
    "    SystemMessage(content=\"你是一个专业的知识助手。\"),\n",
    "]\n",
    "prompt = HumanMessage(\n",
    "    content=augment_prompt(query)\n",
    ")\n",
    "\n",
    "messages.append(prompt)\n",
    "res = chat.invoke(messages)\n",
    "print(res.content)"
   ]
  }
 ],
 "metadata": {
  "accelerator": "GPU",
  "colab": {
   "collapsed_sections": [
    "i8TTksfT2K3r",
    "OZN6yzwi7J61",
    "lgABYtKp8_Ke",
    "ymUjel7-E-t1"
   ],
   "gpuType": "T4",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "0e42f4231586464abadc5674077b5b85": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "FloatProgressModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "FloatProgressModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "ProgressView",
      "bar_style": "",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_881fd88d52904346a58d3ed6b7b25b42",
      "max": 7,
      "min": 0,
      "orientation": "horizontal",
      "style": "IPY_MODEL_5d32c664e3a94141a49200a3bf815719",
      "value": 5
     }
    },
    "17569993d70142caaca7d550eaf84773": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "462fdec12c174c7f893296108744867b": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "5d32c664e3a94141a49200a3bf815719": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "ProgressStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "ProgressStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "bar_color": null,
      "description_width": ""
     }
    },
    "7a2bda030860431a9ff1bc8719f15d9c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_17569993d70142caaca7d550eaf84773",
      "placeholder": "​",
      "style": "IPY_MODEL_c972648b06754d43ada362e512fea865",
      "value": " 5/7 [00:49&lt;00:20, 10.03s/it]"
     }
    },
    "7f7bd9c4ac8441ad9d252869f75b17e6": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HTMLModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HTMLModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HTMLView",
      "description": "",
      "description_tooltip": null,
      "layout": "IPY_MODEL_853287ee89244e169ffebbd4a7a0335d",
      "placeholder": "​",
      "style": "IPY_MODEL_462fdec12c174c7f893296108744867b",
      "value": "Loading checkpoint shards:  71%"
     }
    },
    "853287ee89244e169ffebbd4a7a0335d": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "881fd88d52904346a58d3ed6b7b25b42": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    },
    "c972648b06754d43ada362e512fea865": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "DescriptionStyleModel",
     "state": {
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "DescriptionStyleModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "StyleView",
      "description_width": ""
     }
    },
    "d0c0568f4d6f4bda9253932577becf3c": {
     "model_module": "@jupyter-widgets/controls",
     "model_module_version": "1.5.0",
     "model_name": "HBoxModel",
     "state": {
      "_dom_classes": [],
      "_model_module": "@jupyter-widgets/controls",
      "_model_module_version": "1.5.0",
      "_model_name": "HBoxModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/controls",
      "_view_module_version": "1.5.0",
      "_view_name": "HBoxView",
      "box_style": "",
      "children": [
       "IPY_MODEL_7f7bd9c4ac8441ad9d252869f75b17e6",
       "IPY_MODEL_0e42f4231586464abadc5674077b5b85",
       "IPY_MODEL_7a2bda030860431a9ff1bc8719f15d9c"
      ],
      "layout": "IPY_MODEL_d80d0f6e4afb4e709e6a01e3d651b7bf"
     }
    },
    "d80d0f6e4afb4e709e6a01e3d651b7bf": {
     "model_module": "@jupyter-widgets/base",
     "model_module_version": "1.2.0",
     "model_name": "LayoutModel",
     "state": {
      "_model_module": "@jupyter-widgets/base",
      "_model_module_version": "1.2.0",
      "_model_name": "LayoutModel",
      "_view_count": null,
      "_view_module": "@jupyter-widgets/base",
      "_view_module_version": "1.2.0",
      "_view_name": "LayoutView",
      "align_content": null,
      "align_items": null,
      "align_self": null,
      "border": null,
      "bottom": null,
      "display": null,
      "flex": null,
      "flex_flow": null,
      "grid_area": null,
      "grid_auto_columns": null,
      "grid_auto_flow": null,
      "grid_auto_rows": null,
      "grid_column": null,
      "grid_gap": null,
      "grid_row": null,
      "grid_template_areas": null,
      "grid_template_columns": null,
      "grid_template_rows": null,
      "height": null,
      "justify_content": null,
      "justify_items": null,
      "left": null,
      "margin": null,
      "max_height": null,
      "max_width": null,
      "min_height": null,
      "min_width": null,
      "object_fit": null,
      "object_position": null,
      "order": null,
      "overflow": null,
      "overflow_x": null,
      "overflow_y": null,
      "padding": null,
      "right": null,
      "top": null,
      "visibility": null,
      "width": null
     }
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
