{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "85c396b9-5ab4-4852-86cd-51c6bc07e075",
   "metadata": {
    "ExecutionIndicator": {
     "show": true
    },
    "execution": {
     "iopub.execute_input": "2026-05-14T11:57:16.455287Z",
     "iopub.status.busy": "2026-05-14T11:57:16.455107Z",
     "iopub.status.idle": "2026-05-14T11:57:25.959972Z",
     "shell.execute_reply": "2026-05-14T11:57:25.959472Z",
     "shell.execute_reply.started": "2026-05-14T11:57:16.455268Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "提取的字典部分：\n",
      "dlbcl_data = {\n",
      "    \"nodes\": [\n",
      "        {\"name\": \"弥漫性大B细胞淋巴瘤\", \"type\": \"疾病\", \"definition\": \"来源于成熟B细胞的侵袭性肿瘤，最常见的非霍奇金淋巴瘤类型\"},\n",
      "        {\"name\": \"非霍奇金淋巴瘤\", \"type\": \"疾病\", \"description\": \"一种淋巴系统的恶性肿瘤\"},\n",
      "        {\"name\": \"症状\", \"type\": \"表现\", \"description\": \"无痛性肿物、结外器官症状、肿瘤溶解综合征\"},\n",
      "        {\"name\": \"病理活检\", \"type\": \"检查方法\", \"description\": \"确诊方法之一\"},\n",
      "        {\"name\": \"免疫组化\", \"type\": \"检查方法\", \"description\": \"用于确诊的技术，标记CD20、BCL6、Ki-67等\"},\n",
      "        {\"name\": \"PET/CT\", \"type\": \"影像学检查\", \"description\": \"首选的分期/疗效评估方法\"},\n",
      "        {\"name\": \"R-CHOP\", \"type\": \"治疗方案\", \"description\": \"用于局限期及晚期DLBCL的化疗方案\"},\n",
      "        {\"name\": \"恩替卡韦\", \"type\": \"药物\", \"description\": \"用于HBsAg阳性的抗病毒治疗\"},\n",
      "        {\"name\": \"复发/难治性治疗\", \"type\": \"治疗\", \"description\": \"包括R-ICE、R-DHAP、自体干细胞移植、CAR-T等\"}\n",
      "    ],\n",
      "    \"relationships\": [\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"非霍奇金淋巴瘤\", \"type\": \"IS_A\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"症状\", \"type\": \"HAS_SYMPTOM\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"病理活检\", \"type\": \"DIAGNOSED_BY\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"免疫组化\", \"type\": \"DIAGNOSED_BY\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"PET/CT\", \"type\": \"EVALUATED_BY\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"R-CHOP\", \"type\": \"TREATED_BY\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"恩替卡韦\", \"type\": \"TREATED_BY\"},\n",
      "        {\"source\": \"弥漫性大B细胞淋巴瘤\", \"target\": \"复发/难治性治疗\", \"type\": \"TREATED_BY\"}\n",
      "    ]\n",
      "}\n",
      "生成的文本无法转换为字典: invalid syntax (<string>, line 1)\n"
     ]
    }
   ],
   "source": [
    "import openai,os\n",
    "import re\n",
    "# 读取文件内容\n",
    "with open('lymphoma_knowledge.txt', 'r', encoding='utf-8') as file:\n",
    "    content = file.read()\n",
    "\n",
    "# 定义提示词，告诉模型如何提取知识图谱\n",
    "prompt = f\"\"\"\n",
    "请从以下文本中提取知识图谱，并将其处理成如下格式：\n",
    "\n",
    "nezha_data = {{\n",
    "    \"nodes\": [\n",
    "        {{\"name\": \"哪吒\", \"title\": \"三太子\", \"type\": \"Deity\", \"weapon\": \"火尖枪\"}},\n",
    "    ],\n",
    "    \"relationships\": [\n",
    "        {{\"source\": \"李靖\", \"target\": \"哪吒\", \"type\": \"FATHER_OF\"}},\n",
    "    ]\n",
    "}}\n",
    "\n",
    "文本内容：\n",
    "{content}\n",
    "\"\"\"\n",
    "\n",
    "# python包引入\n",
    "import os\n",
    "from openai import OpenAI\n",
    "\n",
    "# 获取api_key\n",
    "api_key = os.getenv(\"OPENAI_API_KEY\", \"sk-q4U13e0730fabd22176af25b9454c7896610386fc114fi4t\")\n",
    "base_urls = [\"https://api.gptsapi.net/v1\"]\n",
    "client = OpenAI(api_key=api_key, base_url=base_urls[0])\n",
    "\n",
    "# 问答函数定义\n",
    "def chat(model=\"gpt-4o-mini\", messages=[], temperature=0.7):\n",
    "    completion = client.chat.completions.create(\n",
    "    model=model,\n",
    "    messages=messages,\n",
    "    temperature=temperature,\n",
    "    )\n",
    "    return completion.choices[0].message.content\n",
    "\n",
    "# 提取生成的文本\n",
    "messages = [{'role': 'user', 'content': prompt}]\n",
    "generated_text = chat(model=\"gpt-4o-mini\", messages=messages)\n",
    "# 使用正则表达式提取Python字典部分\n",
    "pattern = r\"```python\\s*(.*?)\\s*```\"\n",
    "match = re.search(pattern, generated_text, re.DOTALL)\n",
    "\n",
    "if match:\n",
    "    # 提取字典部分\n",
    "    dict_text = match.group(1).strip()\n",
    "    print(\"\\n提取的字典部分：\")\n",
    "    print(dict_text)\n",
    "\n",
    "    # 将提取的文本转换为Python字典\n",
    "    try:\n",
    "        knowledge_graph = eval(dict_text)\n",
    "        print(\"\\n转换后的知识图谱：\")\n",
    "        print(knowledge_graph)\n",
    "    except Exception as e:\n",
    "        print(\"生成的文本无法转换为字典:\", e)\n",
    "else:\n",
    "    print(\"未找到Python字典部分。\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4851fe8e-0e9d-4976-8d01-6ef54497f943",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
