adm-ntuh-net/forteo/Untitled.ipynb

436 lines
63 KiB
Text
Raw Permalink Normal View History

2024-12-12 02:19:16 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"\n",
"import math\n",
"import re\n",
"\n",
"from pandas import read_excel\n",
"from pymongo import MongoClient\n",
"from pyquery import PyQuery as pq\n",
"from scipy import stats\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"SHEETS = (\n",
" (\"神外-總院_1070920.xlsx\", \"總院\"), \n",
"# (\"(骨穩)總院_分科1070928.xlsx\", \"工作表1\")\n",
" (\"(骨穩)總院_分科1071002.xlsx\", \"工作表1\")\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"client = MongoClient(\"mongodb.xiao.tw\", 27017)\n",
"db = client.forteo\n",
"posts = db.posts"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from matplotlib.font_manager import FontManager\n",
"\n",
"import subprocess\n",
"\n",
"def get_matplot_zh_font():\n",
" fm = FontManager()\n",
" mat_fonts = set(f.name for f in fm.ttflist)\n",
"\n",
" output = subprocess.check_output('fc-list :lang=zh -f \"%{family}\\n\"', shell=True)\n",
" output=str(output)\n",
" zh_fonts = set(f.split(',', 1)[0] for f in output.split('\\n'))\n",
"\n",
" print(mat_fonts)\n",
" print(zh_fonts)\n",
" available = list(mat_fonts & zh_fonts)\n",
"\n",
" print ('*' * 10, '可用的字體', '*' * 10)\n",
" for f in available:\n",
" print(f)\n",
" return available\n",
"\n",
"def set_matplot_zh_font():\n",
" available = get_matplot_zh_font()\n",
" if len(available) > 0:\n",
" mpl.rcParams['font.sans-serif'] = [available[0]] # 指定默認字體\n",
" mpl.rcParams['axes.unicode_minus'] = False # 解決保存圖像是負號'-'顯示為方塊的問題\n",
" \n",
"# set_matplot_zh_font()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"frames = []"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"for file_name, sheet_name in SHEETS:\n",
" data = read_excel(file_name, sheet_name)\n",
" frames.append(data)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"df=pd.concat(frames, ignore_index=True, sort=False)\n",
"df.to_excel('concat.xls')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"8.680568609022558\n",
"6.696428571428571 0.14285714285714285 24.0\n"
]
}
],
"source": [
"print(df['用藥時間'].mean())\n",
"print(df['用藥時間'].median(), df['用藥時間'].min(), df['用藥時間'].max())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"antiresorptives = {\n",
" 'Prolia': 'RANKL inhibitor',\n",
" 'Evista': 'SERM',\n",
" 'Fosamax': 'Bisphosphonate',\n",
" 'Aclasta': 'Bisphosphonate',\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"countDrug = Counter()\n",
"\n",
"column_ar = []\n",
"\n",
"for index, row in df.iterrows():\n",
" tmp_ar = None\n",
" post = posts.find_one({\"_id\": row['病歷號']})\n",
" if post is None:\n",
" continue\n",
" print(index, row['病歷號'], post)\n",
"\n",
"# print(index, row['病歷號'])\n",
" drug_set = set()\n",
" if post['drug']:\n",
" \n",
" for drugs in post['drug']:\n",
"# print(drugs)\n",
" for tr in drugs['Drug']:\n",
" pqtr = pq(tr)\n",
" drugname = pqtr('td')[0].text\n",
" drugname = drugname.replace('(管4) ', '')\n",
" drug_set.add(drugname)\n",
"# countDrug[pqtr('td')[0].text] += 1\n",
"# print(pqtr('td')[0].text)\n",
"# break\n",
"# print(drug_set)\n",
" if tmp_ar is None:\n",
" for ar in antiresorptives:\n",
" if drugname.startswith(ar):\n",
" tmp_ar = ar\n",
"\n",
" for d in drug_set:\n",
" countDrug[d] += 1\n",
" \n",
" column_ar.append(tmp_ar)\n",
"# break\n",
"# print(countDrug)\n",
"# row['AR'] = column_ar\n",
" df.loc[index, 'AR'] = tmp_ar\n",
" if tmp_ar:\n",
" df.loc[index, 'AR2'] = antiresorptives[tmp_ar]\n",
" df.loc[index, 'antiresorptives'] = tmp_ar if tmp_ar == 'Prolia' else ' Others'\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAADuCAYAAADSkstYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzsnXeclNXV+L9nttF7EQFZmiwsA0sRQaQtalBUNGJNNCam+Ia8iSYxIckvyaa8CYlJTLEQTaIbE0vsBRtRKSK9l6WtLL2XhWXrzJzfH88dGYYts8P0vd/PZz47c+t5Znef89xzzj1XVBWLxWKxWAJxxVsAi8VisSQeVjlYLBaL5RyscrBYLBbLOVjlYLFYLJZzsMrBYrFYLOdglYPFYrFYzsEqB4vFYrGcg1UOFovFYjkHqxwsFovFcg5WOVgsFovlHKxysFgsFss5WOVgsVgslnOwysFisVgs52CVg8VisVjOwSoHi8VisZyDVQ4Wi8ViOQerHCwWi8VyDlY5WCwWi+UcrHIIExH5kYhsFJF1IrJGRC415Zki8kcR2S4i20TkNRHpEdCvrJ4x14jIc0Flr4vIXQGfnxCRB8z7eSIy0rwvEZGXAtpNF5GnapnjShFZKSLrzc/8gLoRpny7iPxZRMSUdxCRueZ65opI+zC+spAwMmWJyA9DbF/n92mxWMLHKocwEJExwLXAcFUdAlwB7DbVvwJaAwNUtT/wKvCy/0Zbz5gDgTRgnIi0DKj6JvAzEWknIpcBlwIP1THMCBEZ1ID4R4DrVNUNfAF4OqDuMeArQH/zmmLKZwLvm+t533yOOCLSG9irqlVASMrBYrFEB6scwqMbcMTcxFDVI6q6T0RaAF8E7ldVr6l7EqgC8usczeF2nBv1e8A0f6GqlgCPA7/FuXl/Q1U9dYzxe+BH9U2iqqtVdZ/5uBFobp7UuwFtVHWJqirwT+AG024aUGjeFwaUf4qI3C0ir5qVRYmIfENEvi0iq0VkiYh0MO0uCVhtPSgiGwKGmQK8IyKzjFxrROTfpt+rZlWxUUS+GjT3Q6b8fRHpXN/1WyyW0LDKITzeA3qKyFYReVREJpjyfsAuVT0Z1H4FkNvAmLcCzwHP4iiKQH6Hc+PcoKoL6hnjP8BwEekXykUANwGrjJLrDuwJqNtjygC6qup+8/4A0LWO8QYDnwUuAf4PKFfVYcBiwG8aexL4mqrmAd6g/lOAd1R1JlChqnmq+jlT9yVVHQGMBL4pIh1NeUtgharmAvOBn4Z47RaLpR6scggDVS0DRgBfBQ4Dz4vI3eGOZ/wGR1R1F47ZZpj/SdswBOd3lSMi9f3OvMCDwA9CmDMX+A3wtcbIalYVWkf1h6p6SlUPA6XAG6Z8PZAtIu2A1qq62JQ/EyBPJtBDVT+pY+xvishaYAnQE8fsBeADnjfv/wVc3pjrsVgstWOVQ5ioqldV56nqT4Fv4DyFFwMXiUjroOYjcEw4dXE7zo2/xIzRxoyHUQaPAp8HtgH/04BoTwPjcW6gtWIc5K8Ad6lqsSneC/QIaNbDlAEcNGYnzM9DdQxdFfDeF/DZB6Q3IPc44KM65J2I49cZo6pDgdVAszrGqUtxWSyWRmCVQxiIyAAR6R9QlAfsVNXTODb5P4hImml7F9AC+KCOsVzALYBbVbNVNRvHxu83LX0N2Kaq84BvA9+vz66uqjU4Duv765ivHTAHmKmqiwL67QdOisho4zy/C3jNVL+O47zG/HyNMFDVE8Apf2QXcFtA9RTg7YDPNSKSYd63BY6rarmI5ACjA9q5gOnm/R3UoWAsFkvjsMohPFoBhSKySUTWAYOAAlP3A6AS2Coi24CbgRuNOQaghYjs8b+AH+NE6OwLGH8BMEhEegHfB74LYNr8Ecc5XR9/p+4n9W/g+EZ+Yhy+a0Ski6n7OvA3YDvOCsZ/s54FXGmu5wrzOVzuAZ4QkTU4/oJSUz4Rx2fg53FgnXFIvwOki0iRmXtJQLvTwCjj2M4Hfn4eslksFoOcuWdZLNFHRFoZnw0iMhMn8utB4AlVvTquwlkslk+xysESU0TkVpzVVTqwE7jbOLAtFksCYZWDxWKxWM7B+hwsFovFcg4NhRdaLMlHQdt2QGegPdDOvFrjhNRWB7xqgj6fBnZRUFpay6gWS5PCmpUsyUlB2x7AUJwwYjdwEXABjoO7rj0QoVIKlNTx2kpBafl5jm+xJDxWOVgSm4K26Tg3/6FBrw71dYsiHmAtsOjTV0Hp3vq7WCzJh1UOlsSjoG1X4Brzugpnx3gisxP4GEdZfASso6DU/mNZkhqrHCzxp6Ct4CTrm4qjEEYA9aY4T3D24KRqfwlYSEFpcIJBiyXhscrBEj8K2o4FvoRzNkaXBlonJV6VT/pVPf2u4noGWFQya6r9h7MkBVY5WGJLQds2wJ04OaPccZYm6hT5Lvro6upZ/kyxO3Fyb80umTV1fz3dLJa4Y5WDJTYUtB2Ok1H2dpycSk2C+6q/vuJV3+Ujg4prgBeBP5XMmro0DmJZLA1ilYMlehS0zcRJNf4/OIf0NCl8Kscurips7SE9o55my4A/A/8pmTW1JkaiWSwNYpWDJfIUtM3A8SX8iHrOlUh11vr6LJxW/ctxITbfA/wC+EfJrKl1HQNrscQMqxwsEcNd6E6bs3vfbRd5PL8Aesdbnnhzb/W3Vr3ju3R4I7ttwznq9DnrvLbEE5tbyRIR3IXua4C1D3TpeC9WMeBTOfye75KhYXTtj3N86ursmXOmRlgsiyVk7MrBcl64C915wO+AyQCoel/ae2DXxTU1TVpBrPBdvGB6dcH4CAw1H7i3ZNbUzREYy2IJGascLGHhLnRn4Zx+9wCQFljXs6ZmyVt79o+urV9T4UvV3133gW/4kAgNV41zAt6vSmZNrWqoscUSCaxZydJo3IXuS4BVwEyCFAPA7oyM0SuaZW2KuWAJgldd+z/wDYvkHo5M4CfA2uyZcyZGcFyLpU6scrCEjLvQneUudP8aWIxzbnadfKdLpyb7hLvUl7MVJBrpPwYAH2bPnPOP7Jlz2kdhfIvlU6xysIREQ6uFYI6lpQ2b26L5qqgLloA84r2hY5Sn+CKwJnvmnDFRnsfShLE+B0u9uAvd6cDPge8RglIIpKXPt2nJzj31rjBSDY+69vSr+lePWE2Hs5fkQRv2aok0duVgqRN3obsD8C7wAxqpGABOu1yDnmvdaknEBUtgPvINLo7hdOnAb4A52TPndIrhvJYmgFUOllpxF7pzgeVA/vmM82DH9l280GRSVj/suTEe2WWvxjEzRSJ01mIBrHKw1IK70H0djtO5z/mOVS3SZ3a7tovPX6rEp1rTdq7QAQPjNH134L/ZM+fcFaf5LSmGVQ6Ws3AXun+Ac1BN60iN+US7Nn2qhMpIjZeozPPl7YizCBlAYfbMOT+NsxyWFMA6pC0AuAvdzYC/A3dEY/ybT56a/5OjxydEY+xE4fqqX2xbp337x1sOw1PAV22mV0u4JMTKQUSyRWRDUFmBiHzXvH9KRKab9/eJSIsoy9NORL7e2LqgdudcUz1tR4vIEyKSJyLXhND+bhF5OJSxQ8Fd6G4NvE+UFAPAi61buU+JnIzW+PGmUjOKE0gxANwNvJ09c07beAtiSU4SQjk0kvuAWpWDiDQ6oqYO2gF1KYD66sLlauAdIA/nDOWYYRTDO8Bl0ZxHRTr8uHPH1dGcI57M9Y3YHW8ZamEysCB75pxo77uwpCBJpRxE5JvAhcCHIvKhKSsTkd+LyFpgjIj8RESWi8gGEXlcxNmpKiL9ROS/IrJWRFaJSF9T/oBpv05EfmammgX0FZE1IvJgkBhn1YlIKxF534y5XkSmBbRNF5F/i0iRiLxYz4pnMvBfnP0Et5qxbxWRUSKyWERWi8jHIjIgoE9PEZknIttEJCwbs7vQ3Qp4mygrBj/vt2g+8kia63As5oo1D3tu6BVvGepgCI6j2u6otjSKpFIOqvpnYB8wSVUnmeKWwFJVHaqqHwEPq+olqjoYaI5zeD3Av4FHVHUozs1wv4hchZMieRTOU/sIERmPswu
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cDrug = countDrug\n",
"del cDrug['Forteo Prefilled Inj 600 mcg/2.4 mL /pen']\n",
"del cDrug['(4mm) Micro-Fine Insulin Pen Needle 32G /set']\n",
"del cDrug['(8mm) Micro-Fine Insulin Pen Needle 31G /set']\n",
"labels, values = zip(*countDrug.most_common(10))\n",
"# plt.rcParams['font.sans-serif']=['Arial Unicode MS'] #用来正常显示中文标签\n",
"plt.pie(values, labels=labels)\n",
"plt.savefig('drug.png', dpi=400)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"antiresorptives = {\n",
" 'Prolia': 'RANKL inhibitor',\n",
" 'Evista': 'SERM',\n",
" 'Fosamax': 'Bisphosphonate',\n",
" 'Aclasta': 'Bisphosphonate',\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Counter({'Prolia': 30, 'Evista': 10, 'Fosamax': 6, 'Aclasta': 5})\n",
"Counter({'RANKL inhibitor': 30, 'Bisphosphonate': 11, 'SERM': 10})\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAP4AAADuCAYAAAAZQLrKAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xl8VOW9x/HPb9ZsZGEHAww7EVE2FwRZRG/tjdbaWqt1SRVUbF2gtnWurTatW6za1aqttXrrhrtWx6XVW0ARKiDLgAlYYBTKIltC9klmnvvHGSTsSWY558w879crL5KZc878JuQ755znPM9zRCmFpmmZxWF2AZqmpZ4OvqZlIB18TctAOvialoF08DUtA+nga1oG0sHXtAykg69pGUgHX9MykA6+pmUgHXxNy0A6+JqWgXTwNS0D6eBrWgbSwde0DKSDr2kZSAdf0zKQDr6mZSAdfE3LQDr4mpaBdPA1LQPp4MeISEREVojIahF5QURyOrh+SES6x77/MDlValpi6ODv16iUGq2UOgEIA7PaPimGdv2+lFKnJ6NATUsUHfzDex8YIiI+EVkrIn8FVgP9ROQSEQnGjgzuPdzKIlIX+zdPRN4TkY9j65yfwvegaUekg38QEXEBXwWCsYeGAg8ppUYCLcC9wJnAaOBkEfn6UTbXBFyglBoLTAMeEBFJWvGa1k46+Ptli8gKYCnwOfBY7PHPlFKLY9+fDMxTSu1QSrUCTwOTj7JNAe4WkVXAu8BxQK+kVK9pHeAyuwALaVRKjW77QGznXB/HNi8FegDjlFItIhICsuLYnqYlhN7jd8xHwBQR6S4iTuASYP5Rli8AvoiFfhowIBVFatqx6D1+ByiltoqIH/gnxmF8QCn12lFWeRp4XUSCGKcQVSkoU9OOSfTdcjUt8+g9vo35/AEvkNfmK/egn/MAD1AD7Ab2xP7dDewJVZS2mFC2ZgF6j29xsXAPB0qA49t8DQa8cW6+jv0fCNsxTkXW7PsKVZRWx7l9zaJ08C3E5w/0AaYCJ7I/6IMAp0kl/QejbWJJ7GtpqKJ0t0m1aAmkg28inz+Qj9GxZzpwFkbYra4SeB14FVgcqijVf0A2pIOfYj5/wAd8LfY1GXCbWlB8trP/Q+C9UEVpk8n1aO2kg58CPn+gJzAT+DbGYXw6qgPeAV4DAvqUwNp08JPI5w+cAXwP+AZG63qmaMU4Cvh1qKJUD1G2IB38BPP5A3nA5cB1wCiTy7GCj4BfAy+GKkpbzS5GM+jgJ4jPHzgeY+9+OZBvcjlWtAn4PfCovkxoPh38OPn8gbFABXC22bXYRD3wOPDbUEXpv80uJlPp4HeSzx8oBu4GLsPot691TBRjLMMtoYrSrWYXk2l08DvI5w90AfzAHCDb5HLSQS1wJ/CbUEVp2OxiMoUOfjv5/AEncDXwc6CnyeWko3XATaGK0rfNLiQT6OC3g88fKAXuwx496+zuDWB2qKJ0vdmFpDMd/KPw+QPdMKbg0pNkplYzxiXAO0MVpfHMgKQdgQ7+Efj8genAX4G+ZteSwTYDV4QqSv9pdiHpRgf/ID5/wA3cBfwQ3VpvBVGMmY1v1x2AEkcHvw2fPzAUeAYYb3Yt2iEWA98JVZRuNLuQdKAn24zx+QNXAcvRobeq04DlPn9At7ckQMbv8X3+QAHwR4yRc5r1KeAe4LZQRWnU7GLsKqOD7/MHBgFvAcPMrkXrsL9jHPrvMrsQO8rY4Pv8gZMxrhnrzjj2tRH4L93nv+My8hzf5w+cB8xDh97uBgILYiMjtQ7IuOD7/IGZwCtAjtm1aAnRB5jv8wfGmF2InWRU8H3+wBzgUcybtVZLju7AP33+wASzC7GLjAm+zx/4GfArs+vQkqYA+LvPH5hmdiF2kBGNez5/4D6Mnnha+msCvhmqKH3T7EKsLO33+D5/wI8OfSbJAl7x+QPfNLsQK0vrPb7PH7gYowuu7nOfeSLA10MVpW+YXYgVpW3wY1Nb/4P47y+n2VctMCFUUbrG7EKsJi2D7/MHhgGLgK5m16KZbiNwSqiidKfZhVhJ2p3j+/yBHhjdcHXoNTA6+bwUG26txaRV8H3+QDbwN4w7zGraPpOBh8wuwkrS5lDf5w84gBcwbldlaZsfvgqHJxscDsThpE/Zbwhv38Cud/6AioQRh5OuZ1+Ht+/wQ9bd88+/0Lh+KUpFyR44hqLp16Bam9n5agUt1dsQcZA95BSKpn439W/M+uaEKkp/Y3YRVuAyu4AE+gU2CP0+vS65G2dOwZc/75n3OIUTLyF78Hga1y9hz7zH6f2digPWadpcSfN/Kulz1e8B2Pb0j2neFMTTZxj5p3yDrAEnoiItbJ/7ExrXLyV7sJ5a4CD3+/yBKj2Tb5oc6se6avrNriNe0XCD8W9zA868boc8LwKqNYyKtKIiLRCN4MwpwuHOImuAcRNecbrx9BpMa61uyzoMJzDX5w+MMLsQs9n+UN/nD+QCK4AhZtfSXpsfmYEzKw+AvNFfpcvoc2jZuYntz98OKFBRel92P66CQwcP7vm/x6hd9XdQii7jzqVo8hUHPB9tqmPrEzfR8+K7cBf2TsXbsaMVwMmZPIdfOhzq/xIbhR6g96X34urSnUh9Nduf+ynubsU0rF1I0fSZ5A6fSH3l++x667f0uviuA9Zr2bOFll2bKP7eEwBsf+6nNG1aTVa/EwBQ0Qg7/nYfXcZ9TYf+6EYDt2BMqpqRbH2o7/MH/gvjdtS24urSHQBnbiE5wybQvGUddcH3yBl2OgA5IybRvHXdIes1rFuEp+9wHJ5sHJ5ssgeNp3lL1ZfP73r797i79iX/ZD0tXTvc5vMHMvYGKbYNvs8fKAT+gs2640bDTUSbG778vmnjcjw9BuDM60rzpiAATZ+txF106HT+rvweNG9ajYpGUJFWmjcFcXfrB8CeBU+imhsomn516t6MvXmBx2JXgzKOnQ/1HwSOM7uIjoo0VLPj5TuNH6JRco+fQvagcXTzZLHn3T+hohHE5aHrOTcA0Lz1U+pWvEW3r95IzvCJNH22ii2PfR8RIWvgWHKGnErr3p3sXfQcrq7FbH3iJgC6jD2XLid9xay3aRcTgBuA35pdSKrZsnEvNvLqRbPr0NJCPTAq0+brt91hjs8f6A48YnYdWtrIxZiVKaPYLvjArRhTLWlaokyPzcWYMWx1qO/zB/oBn6KH2mqJVwMMyZRRfHbb4/8MHXotOQpIg96f7WWbPb7PHxgOrEHPkKslTxMwOFRRusXsQpLNTnv8O9Ch15IrC7jN7CJSwRZ7fJ8/MBZYis0662i21AKMCFWUbjC7kGSyyx7/bnTotdRwkwHn+pbf4/v8gSkY97nTtFQJAwPT+VzfDnv8X5hdgJZxPMDNZheRTJbe4/v8gROBlWbXoWWkOmBAqKJ0t9mFJIPV9/jfM7sALWPlAbPMLiJZLBt8nz/QBbjU7Dq0jHa52QUki2WDj/FLzzO7CC2jjfD5A2k5Y6mVg59RgyY0y0rLvb4lG/d8/sAoYJXZdWga8AVwXLpNzGnVPX5afspqttQTONvsIhLNcsGPzYGmG/U0K0m7HZHlgg9MBw6daVLTzHO+zx9Iq4ZmKwb/PLML0LSD5GCj27O1hxWDP93sAjTtMC4zu4BEslSrvs8f6A1sNbsOTTuMMFAQqihtMruQRLDaHv9MswvQtCPwACebXUSi6OBrWvudYXYBiaKDr2ntN8nsAhLFMuf4Pn9gIJDW0x1ptlcNdAtVlEbNLiReVtrj6729ZnWFwAlmF5EIOvia1jFpcZ5vpeBPNbsATWuHtDjPt0Twff5AAbqbrmYPOvgJNMTsAjStnYp9/kCx2UXESwdf0zpusNkFxMsqwbf9L1LLKP3NLiBeVgm+3uNrdtLP7ALipYOvaR2n9/gJooOv2YkOfrx8/kAO0MfsOjStA/ShfgLohj3NbvQePwF8ZhegaR2UH+t0ZltWCH6u2QVoWifYeq9vheB7zC5A0zrhOLMLiIcOvqZ1TpbZBcRDB1/TOsdtdgHx0MHXtM7RwY+TDr5mR7YOvsvsAtDBTwo
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cAR = Counter()\n",
"ccAR = Counter()\n",
"\n",
"for c in countDrug:\n",
" for ar in antiresorptives:\n",
" if c.startswith(ar):\n",
" cAR[ar]+=countDrug[c]\n",
" ccAR[antiresorptives[ar]]+=countDrug[c]\n",
" \n",
"print(cAR)\n",
"print(ccAR)\n",
"labels, values = zip(*cAR.most_common())\n",
"plt.pie(values, labels=labels, autopct='%.2f')\n",
"plt.savefig('cAR.png', dpi=400)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"PATTERNS = (\n",
" 'T-\\s*score\\s*=\\s*([-+]?\\d*\\.\\d+)',\n",
" 'T-\\s*score\\s*=\\s*(- \\d*\\.\\d+)',\n",
" 'T-score is\\s*([-+]?\\d*\\.\\d+)',\n",
" 'T score =\\s*([-+]?\\d*\\.\\d+)',\n",
")\n",
"def T_score(html):\n",
" min = None\n",
" for pattern in PATTERNS:\n",
" for m in re.findall(pattern, html):\n",
"# print(m)\n",
" t = float(m.replace(' ', ''))\n",
"# print (t)\n",
" if min and t > min:\n",
" continue\n",
" min = t \n",
" return min \n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEcCAYAAADZQfNOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xt8HVW99/HPl7QgNwGtJ0JBilCPwaqI8V60taiAKOrBI9FHLU881fNSvOHRajyKaI6gPnpUVE41WC4aQEQo0FNuJmLVKgW5CBGtXKQUBVpuKRVo+T1/rBWYbnYmSbOTnd1836/XfmUua2atmT2Z38xaa88oIjAzMxvMNvUugJmZTWwOFGZmVsqBwszMSjlQmJlZKQcKMzMr5UBhZmalHCislKTFkr5U73LUW9l+kDRf0vLxLtNwSLpB0px6l2OkJD1LUr+kpnqXxRwoGoakWyVtyP8890q6SNJe9S5XkaSQtF+9yzFZVQtmEfG8iOitU5GGLR/fBw+MR8RfI2KniNhUz3JZ4kDRWN4UETsBuwN/B75d5/KMGSU+PseQpClbUz42dvyP2IAi4h/AOcD+A9Mk7SLpNEl3S7pN0mcHTrSSvifpp4W0J0q6PJ+M50haLekzku7JV3bvGixvSf8maZWkdZKWSNojT78iJ7k23/W8o8qyTZL+X87nFkkfynchU/L8Xkmdkn4FPAQ8W9IeOZ91Od9/K6xvsyvogW0pjN8q6dOSbsx3YT+U9JTC/MMlXSPpPkm/lvSCwrwXSbpa0oOSzgIeX27wXaOTJN0v6Y+S5uWJb5d0VUXCj0s6f5CVHC2pL+d7s6T3V26fpGMl3SXpTklH53kLgHcBn8z7/4LCPjg4Dx8n6RxJZ0h6AJgvaRtJCyX9RdJaSWdLelpO/5Scdm3eR1dKas7zyr6XavkMTDsrb9vVkl6Y058OPAu4IJf9k5JmDBwbkt4haWXFfvqYpCV5eDtJX5P0V0l/l3SypO3zvGmSLszlXyfpl/IFyMhFhD8N8AFuBQ7OwzsApwKnFeafBpwP7AzMAP4EtBfS/wmYDxwE3APsmefNATYCXwe2A14DrAf+Oc9fDHwpD782L3tgTvtt4IpCGQLYr2QbPgDcCOwJ7AZclpeZkuf3An8FngdMAaYCVwDfJZ2oDwDuBl5bWbbCtqyu2Gd/APYCngb8qrAtLwLuAl4GNAHvzem3A7YFbgM+lstwJPBoMa+K7Zqf9+FA+ncA9+c8twPWAS2F9L8H/mWQdb0R2BdQ/i4eAg6s+K6Oz/kclufvVm1/VDlujsvb8RbSReL2wEeAFfk72Q74H6A7p38/cAHp+GkCXgw8Nc8r+16q5TMw7chc9k8AtwBTK8uZx2eQj42c/4PAzML8K4Gj8vA3gCV5f++cy/zlPO/LwMk5z6mk41/1/n9utE/dC+DPML+o9I/UD9yX/+HWAM/P85qAR4D9C+nfD/QWxl+WT1i3AW2F6QMnnx0L084G/jMPP37yAbqArxTS7ZTLMiOPDxUofg68vzB+ME8OFMcX5u8FbAJ2Lkz7MrC4smyFbakMFB8ojB8G/CUPfw/4YkX5biKdnF+d968K835NeaCoTP874N2FvDrz8POAe4Hthvm9nwd8pLB9Gwb2V552F/DyavujsA+KgeKKivl9wLzC+O75O50C/N+83S+oWGao76VaPscBKwrj2wB3AgdVljOPz6g4Ns4APpeHZ5ICxw6kgLoe2Lew7CuAW/Lw8aQLqEGPS3+G/vgWrLG8JSJ2JV3FfQj4haRnAtNIV0u3FdLeBkwfGImI3wI3k/6xzq5Y770Rsb5i2T2q5L9HMY+I6AfWFvMZwh7A7YXx26ukKU7bA1gXEQ9WlG24+VWur7hdewPH5iqJ+yTdRzoB7pE/d0Q+0xSWLVMt/UBepwLvlCTg3cDZEfFwtZVIOlTSilxNch8puE0rJFkbERsL4w+RAvZwVe7zvYGfFfZBHykINAOnAxcDZ0paI+krkqYyvO+l9LuNiMeA1VQ/zqr5MdCWh98JnBcRDwHPIAWMqwrbsCxPB/gqsAq4JFflLRxmflbgQNGAImJTRJxL+oeeTaoOepT0Tz/gWcAdAyOSPkiqWlgDfLJilbtJ2rFi2TVVsl5TzCMv8/RiPkO4k1TFMaBar63iyXYN8DRJO1eUbSC/9aSTxIBnVllfMY/idt1OusrftfDZISK6czmn5xN7cdkyj6eXdCswayCviFhBuuM7iHSSO73aCiRtB/wU+BrQnC8KlpKC+3AM51HQlWluBw6t2A9PiYg7IuLRiPhCROwPvBI4HHgPQ38vg5Xl8e8itxPsyRPfx1BlvxR4hqQDSAHjx3n6PaS7rOcVyr9LpE4fRMSDEXFsRDwbeDPw8YH2Ixs+B4oGpOQIUj1/X6QuhGcDnZJ2lrQ38HHS7TqSngN8Cfg/pCvaT+Z/uKIvSNpW0kGkE8JPqmTdDRwt6YB8Uvsv4LcRcWue/3fg2SVFPxv4iKTpknYFPlW2nRFxO6nq48u5YfUFQDtwiKQNefgjki6RdCDw0by9iyUFqW78g5L2zA20PyA1kM8Hvk86aWzKDaj9kv6WG1bvIVXHfVjSVElvA146UC5JT5X037nxtB/4CilIfTpfce9AOnEuLWzOacBJwKMRMdhvLrYlBfO7gY2SDgVeX7aPKgy1/6s5mXTc7A0g6Rn52ELSXEnPV/otwwOki5HHSr6XM4bI68WS3qbUeeGjwMOk9pEhyx4Rj5KOya+S2iIuzdMfI32X35D0T7nc0yW9IQ8fLmm/HMTvJ11cPTaC/WM4UDSaC/KJ6QGgE3hvRNyQ5x1DusK+GVhOuuI6Jf9TngGcGBHXRsSfgc8Ap+eTPcDfSPXma4Afker1/1iZeURcBvwn6ar3TlKj61GFJMcBp+YqgH+tUv7vA5cA15EadJeSTshlfeXbSPXVa4CfAZ8H/gG8CdgVOJdUd98DnFVY7k+kKpkf5zxvzulvztuyklSn/1Auw4PA1XndK4D/ILU9rCM1Tp8LIGlb4HJSW8MhwFOBL5KuzF9BCjK7kdpa1hbKczrpLmPQk2muyvkwKaDeS7r7WFKybyp1Afvn/X/eMJf5Zs7jEkkPkrb9ZXneM0m96x4gVUn9gifuhp70veTjo8z5pH15L+mC5W05AEBq4/hsLvsnBln+x6R2rZ9UVL99ilS9tEKpl9VlwD/neTPzeD/wG+C7EdEzRDmtUr0bSfyp74eKBuBxzvtQ4LYtWO5WNm/4PAz4U2F8Man6ZhOpXQfSXdL/koLo/DxtPrC8yvovBM4ZJO/3ka5+dxqifJ8gBcT7SQFsV1IwenFe/92kE+aF5B5oedleUuD5VU5/CTCtMP89pPaAtaSg/fi+IF34LQT+kuefDTyt3sdYLttxwBn1Loc/W/bxHYWNG0nbSzos942fTro7+Nko17kD6Sp1RcWsf5DuFubm8feQqn+G41xSe0I1BwPLIjXkl/lX0h3HPsALSHdTV5JO7D8ktfU8i1S/flLFsu8Ejgb+iVQd9QkASfuTuqS+i9Q7aRc2b0A+htQl9TWkRuJ7ge8MUU6zITlQ2HgS8AXSCez3pOqMz23hus7LPVzuB15Hqruu1A+8LreHvIbU1XQ41pDqwat5OqnabSjfiog1EbGOdNI+BDg2ItZGxE8j4qFIVU2duWxFP4yIP0XEBtJdwUB70pHABRGxPCIeIe27YiPwB4COiFgdqVfVccCR8i+jbZR8AE1ykZ4DtOdQ6WqU10PAS2q0urdExGW5ofUIUlfh/SPib4X8dpe0CugALoyIDZt3ZBrUdFLbRDVrSVfzQ/lbYfgbpH78v893QN8gBY7d8vydJTXFE881Ki5b7P66WffiiHhIUrEdZKCra7GxdqCr63B7po2JiDiunvnb6PiOwhpaPLmrcKUzgGMZfrUTwFuBXw4y7zLgDRXdiUfiWFJD68si4qmkH/fB8LrAbta9WOkxFU8vzB+0q+sWltUMcKCwBlfZVbhKkm+RqqauqDKvuJ4mSftI+japgf8LgyQ9nXRC/qmk5yo9K+npSs/KOmw
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"PRE = []\n",
"POST =[]\n",
"\n",
"for index, row in df.iterrows():\n",
" post = posts.find_one({\"_id\": row['病歷號']})\n",
" if post is None:\n",
" continue\n",
" print(index, row['病歷號'], post)\n",
"\n",
"# print(index, row['病歷號'])\n",
"# print(post)\n",
" if post['report']:\n",
" BMD = {}\n",
" for r in post['report']:\n",
"# print(r)\n",
"\n",
" r_class = r['報告類別'].upper()\n",
" r_date = r['檢查日期']\n",
" if 'BMD' in r_class:\n",
"# print(r_date, r_class)\n",
"# print(r)\n",
"# print(row['病歷號'], r_date, r_class)\n",
" t_score = T_score(r['html'])\n",
" if t_score is None:\n",
"# # No T score in report\n",
"# print(row['病歷號'], r_date, t_score, r_class)\n",
" continue\n",
" \n",
" if r_date not in BMD or t_score < BMD[r_date]:\n",
" BMD[r_date] = t_score\n",
" \n",
"# print(row['簽署日'], row['流失/停藥日期'])\n",
"# print(type(row['簽署日']), type(row['流失/停藥日期']))\n",
" \n",
" if type(row['簽署日']) is not str and math.isnan(row['簽署日']):\n",
" dStart = row['流失/停藥日期']\n",
" else:\n",
" dStart = row['簽署日']\n",
" if BMD: \n",
"# print(row['病歷號'], dStart, BMD)\n",
" \n",
" pre = None\n",
" post = None\n",
" \n",
" for d, b in BMD.items():\n",
" if d > dStart:\n",
" post = b\n",
" else:\n",
" pre = b\n",
" \n",
" if pre and post:\n",
" PRE.append(pre)\n",
" POST.append(post)\n",
"# print(row['病歷號'], dStart, pre, post)\n",
" df.loc[index, 'BMD Change'] = post-pre\n",
"\n",
"df.to_excel('concat3.xls')\n",
" \n",
"D=df.boxplot(column='BMD Change',by='antiresorptives') \n",
"D.get_figure().savefig('cAR3.png', dpi=400)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"d = {\n",
" 'bmd_before': PRE, \n",
" 'bmd_after': POST,\n",
"}\n",
"\n",
"df2 = pd.DataFrame(data=d)\n",
"df2.describe() \n",
"df2.plot(kind='box') \n",
"# plt.savefig('boxplot_outliers.png')\n",
" \n",
"# print(stats.ttest_rel(df2['bmd_before'], df2['bmd_after']))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}