adm-ntuh-net/forteo/analysis.ipynb
2024-12-12 10:19:16 +08:00

758 lines
24 KiB
Text

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from collections import Counter\n",
"\n",
"import math\n",
"import re\n",
"\n",
"from pandas import read_excel\n",
"from pymongo import MongoClient\n",
"from pyquery import PyQuery as pq\n",
"from scipy import stats\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"SHEETS = (\n",
" (\"台灣大學醫學院附設醫院_201601-201809.xls\", \"Sheet1\"), \n",
")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"frames = []"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"for file_name, sheet_name in SHEETS:\n",
" data = read_excel(file_name, sheet_name)\n",
" frames.append(data)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df=pd.concat(frames, ignore_index=True, sort=False)\n",
"df.to_excel('concat2.xls')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>醫院</th>\n",
" <th>醫師</th>\n",
" <th>系統編號</th>\n",
" <th>病患姓名</th>\n",
" <th>簽署日</th>\n",
" <th>key-in日</th>\n",
" <th>患者出生年月日</th>\n",
" <th>患者狀況</th>\n",
" <th>流失/停藥日期</th>\n",
" <th>用藥時間</th>\n",
" <th>病患是否參加P1NP</th>\n",
" </tr>\n",
" <tr>\n",
" <th>是否自費</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>健保給付</th>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>80</td>\n",
" <td>43</td>\n",
" <td>80</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>自費</th>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>271</td>\n",
" <td>187</td>\n",
" <td>271</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 醫院 醫師 系統編號 病患姓名 簽署日 key-in日 患者出生年月日 患者狀況 流失/停藥日期 用藥時間 \\\n",
"是否自費 \n",
"健保給付 80 80 80 80 80 80 80 80 43 80 \n",
"自費 271 271 271 271 271 271 271 271 187 271 \n",
"\n",
" 病患是否參加P1NP \n",
"是否自費 \n",
"健保給付 0 \n",
"自費 1 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# print(df['是否自費'])\n",
"df.groupby('是否自費').count()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">用藥時間</th>\n",
" <th colspan=\"8\" halign=\"left\">系統編號</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>是否自費</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>健保給付</th>\n",
" <td>80.0</td>\n",
" <td>12.334821</td>\n",
" <td>7.326534</td>\n",
" <td>0.142857</td>\n",
" <td>6.455357</td>\n",
" <td>12.392857</td>\n",
" <td>18.651786</td>\n",
" <td>24.0</td>\n",
" <td>80.0</td>\n",
" <td>40738.875000</td>\n",
" <td>3719.022181</td>\n",
" <td>35494.0</td>\n",
" <td>37584.0</td>\n",
" <td>40289.5</td>\n",
" <td>44118.75</td>\n",
" <td>47796.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>自費</th>\n",
" <td>271.0</td>\n",
" <td>7.450053</td>\n",
" <td>6.200420</td>\n",
" <td>0.214286</td>\n",
" <td>2.732143</td>\n",
" <td>5.642857</td>\n",
" <td>10.160714</td>\n",
" <td>24.0</td>\n",
" <td>271.0</td>\n",
" <td>41558.937269</td>\n",
" <td>3793.111387</td>\n",
" <td>35258.0</td>\n",
" <td>38007.0</td>\n",
" <td>41723.0</td>\n",
" <td>44780.00</td>\n",
" <td>47792.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 用藥時間 \\\n",
" count mean std min 25% 50% 75% \n",
"是否自費 \n",
"健保給付 80.0 12.334821 7.326534 0.142857 6.455357 12.392857 18.651786 \n",
"自費 271.0 7.450053 6.200420 0.214286 2.732143 5.642857 10.160714 \n",
"\n",
" 系統編號 \\\n",
" max count mean std min 25% 50% \n",
"是否自費 \n",
"健保給付 24.0 80.0 40738.875000 3719.022181 35494.0 37584.0 40289.5 \n",
"自費 24.0 271.0 41558.937269 3793.111387 35258.0 38007.0 41723.0 \n",
"\n",
" \n",
" 75% max \n",
"是否自費 \n",
"健保給付 44118.75 47796.0 \n",
"自費 44780.00 47792.0 "
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('是否自費').describe()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>醫院</th>\n",
" <th>醫師</th>\n",
" <th>系統編號</th>\n",
" <th>病患姓名</th>\n",
" <th>簽署日</th>\n",
" <th>key-in日</th>\n",
" <th>患者出生年月日</th>\n",
" <th>流失/停藥日期</th>\n",
" <th>是否自費</th>\n",
" <th>用藥時間</th>\n",
" <th>病患是否參加P1NP</th>\n",
" </tr>\n",
" <tr>\n",
" <th>患者狀況</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10-成功問卷</th>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>18</td>\n",
" <td>111</td>\n",
" <td>111</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11-成功問卷-次回拒訪</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12-成功問卷-已停藥</th>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>210</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20-電話錯誤/無此人</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24-拒訪</th>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26-往生</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2B-五次聯絡不到</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2D-暫時停藥-觀察中</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2E-暫時停藥-住院中</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2G-連續三個月連絡不到</th>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31-無人接聽</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33-語音信箱/答錄機</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 醫院 醫師 系統編號 病患姓名 簽署日 key-in日 患者出生年月日 流失/停藥日期 是否自費 \\\n",
"患者狀況 \n",
"10-成功問卷 111 111 111 111 111 111 111 18 111 \n",
"11-成功問卷-次回拒訪 1 1 1 1 1 1 1 0 1 \n",
"12-成功問卷-已停藥 210 210 210 210 210 210 210 210 210 \n",
"20-電話錯誤/無此人 1 1 1 1 1 1 1 0 1 \n",
"24-拒訪 8 8 8 8 8 8 8 0 8 \n",
"26-往生 6 6 6 6 6 6 6 0 6 \n",
"2B-五次聯絡不到 1 1 1 1 1 1 1 0 1 \n",
"2D-暫時停藥-觀察中 3 3 3 3 3 3 3 1 3 \n",
"2E-暫時停藥-住院中 1 1 1 1 1 1 1 0 1 \n",
"2G-連續三個月連絡不到 6 6 6 6 6 6 6 0 6 \n",
"31-無人接聽 1 1 1 1 1 1 1 0 1 \n",
"33-語音信箱/答錄機 2 2 2 2 2 2 2 1 2 \n",
"\n",
" 用藥時間 病患是否參加P1NP \n",
"患者狀況 \n",
"10-成功問卷 111 0 \n",
"11-成功問卷-次回拒訪 1 0 \n",
"12-成功問卷-已停藥 210 1 \n",
"20-電話錯誤/無此人 1 0 \n",
"24-拒訪 8 0 \n",
"26-往生 6 0 \n",
"2B-五次聯絡不到 1 0 \n",
"2D-暫時停藥-觀察中 3 0 \n",
"2E-暫時停藥-住院中 1 0 \n",
"2G-連續三個月連絡不到 6 0 \n",
"31-無人接聽 1 0 \n",
"33-語音信箱/答錄機 2 0 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.groupby('患者狀況').count()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"8\" halign=\"left\">用藥時間</th>\n",
" <th colspan=\"8\" halign=\"left\">系統編號</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>25%</th>\n",
" <th>50%</th>\n",
" <th>75%</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>是否自費</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>健保給付</th>\n",
" <td>36.0</td>\n",
" <td>13.112103</td>\n",
" <td>6.840638</td>\n",
" <td>0.750000</td>\n",
" <td>7.678571</td>\n",
" <td>16.196429</td>\n",
" <td>18.651786</td>\n",
" <td>20.857143</td>\n",
" <td>36.0</td>\n",
" <td>38853.555556</td>\n",
" <td>3038.409645</td>\n",
" <td>35494.0</td>\n",
" <td>37015.75</td>\n",
" <td>37777.0</td>\n",
" <td>40363.25</td>\n",
" <td>47017.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>自費</th>\n",
" <td>174.0</td>\n",
" <td>7.182677</td>\n",
" <td>5.494101</td>\n",
" <td>0.678571</td>\n",
" <td>2.758929</td>\n",
" <td>5.678571</td>\n",
" <td>9.705357</td>\n",
" <td>23.714286</td>\n",
" <td>174.0</td>\n",
" <td>40316.954023</td>\n",
" <td>3181.145216</td>\n",
" <td>35335.0</td>\n",
" <td>37448.50</td>\n",
" <td>40391.5</td>\n",
" <td>42677.75</td>\n",
" <td>46896.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 用藥時間 \\\n",
" count mean std min 25% 50% 75% \n",
"是否自費 \n",
"健保給付 36.0 13.112103 6.840638 0.750000 7.678571 16.196429 18.651786 \n",
"自費 174.0 7.182677 5.494101 0.678571 2.758929 5.678571 9.705357 \n",
"\n",
" 系統編號 \\\n",
" max count mean std min 25% 50% \n",
"是否自費 \n",
"健保給付 20.857143 36.0 38853.555556 3038.409645 35494.0 37015.75 37777.0 \n",
"自費 23.714286 174.0 40316.954023 3181.145216 35335.0 37448.50 40391.5 \n",
"\n",
" \n",
" 75% max \n",
"是否自費 \n",
"健保給付 40363.25 47017.0 \n",
"自費 42677.75 46896.0 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = df.query('患者狀況 == \"12-成功問卷-已停藥\"')\n",
"df2.groupby('是否自費').describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}