# -*- coding: utf-8 -*- from __future__ import unicode_literals import atexit import base64 import calendar import datetime import getpass import os import re # import shelve import six import time import uuid import logging FORMAT = '%(asctime)s %(levelname)s %(message)s' logging.basicConfig(format=FORMAT, level=logging.INFO) if six.PY2: import sys reload(sys) sys.setdefaultencoding('utf-8') from urlparse import parse_qs, urlparse else: from urllib.parse import parse_qs, urlparse import pyquery from pymongo import MongoClient from lxml import etree from selenium import webdriver from selenium.common.exceptions import ( NoSuchElementException, StaleElementReferenceException, ) from selenium.webdriver.common.by import By from selenium.webdriver.common.desired_capabilities import DesiredCapabilities from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import Select from selenium.webdriver.support.wait import WebDriverWait import lxml # solve captcha import cv2 import pytesseract # from xvfbwrapper import Xvfb from .myutil import * from .settings import USER_ID, PASSWORD DefaultUserID = USER_ID DefaultPassword = PASSWORD # if __name__ == "__main__": # from myutil import * # else: # from .myutil import * SESSION = None SESSION_TIMESTAMP = datetime.datetime.now() SESSION_CREATED = datetime.datetime.now() # SHELVE_FILE = '/tmp/session.shelve' SHELVE_FILE = "/tmp/%d.shelve" % os.getppid() options = webdriver.chrome.options.Options() options.add_argument("--headless") options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') options.add_argument("--disable-gpu") # Last I checked this was necessary. driver = webdriver.Chrome( # chrome_options=options, options=options, # service_log_path="/tmp/selenium-%s.log" % getpass.getuser(), ) def close_driver(): driver.close() driver.quit() atexit.register(close_driver) # driver.implicitly_wait(10) def CloseOtherWindows(current): for h in driver.window_handles: if h != current: driver.switch_to.window(h) driver.close() driver.switch_to.window(current) def LogWebDriver(prefix = ''): with open('/tmp/%spage_source.html'%prefix, 'w') as out_file: out_file.write(driver.page_source) driver.get_screenshot_as_file('/tmp/%sscreenshot.png'%prefix) def Login(UserID=DefaultUserID, Pass=DefaultPassword, NewSession=False): global SESSION, SESSION_CREATED, SESSION_TIMESTAMP node = uuid.getnode() NOW = datetime.datetime.utcnow() # print (NOW) if (UserID==DefaultUserID) and not NewSession: c = None try: # client = MongoClient("mongodb.xiao.tw") client = MongoClient("mongodb+srv://ntuhuser:0223123456@cluster0.kfeql.mongodb.net/dbname?retryWrites=true&w=majority") # db = client.test db = client.portal collection = db.SESSION collection.find_one() c = collection.find_one({"node": node}, sort=[("SESSION_TIMESTAMP", -1)]) SESSION = c["SESSION"] SESSION_CREATED = c["SESSION_CREATED"] SESSION_TIMESTAMP = c["SESSION_TIMESTAMP"] except: pass ''' if c is None: d = shelve.open(SHELVE_FILE) try: SESSION = d["SESSION"] SESSION_CREATED = d["SESSION_CREATED"] SESSION_TIMESTAMP = d["SESSION_TIMESTAMP"] except: pass d.close() ''' if SESSION and NOW < SESSION_TIMESTAMP + datetime.timedelta(minutes=10) and NOW < SESSION_CREATED + datetime.timedelta(hours=9): try: collection.update_one( {"_id": c["_id"]}, {"$set": {"SESSION_TIMESTAMP": NOW}} ) client.close() except: pass ''' d = shelve.open(SHELVE_FILE) d[str("SESSION_TIMESTAMP")] = NOW d.close() ''' # print ('Use old SESSION: ', SESSION) # return SESSION # Make sure the SESSION works url = ( # "http://hisaw.ntuh.gov.tw/WebApplication/OutPatientAdministration/QueryModifyPatBase.aspx?SESSION=%s" "http://portal.ntuh.gov.tw/General/NewRedirect.aspx?SESSION=%s" % SESSION ) driver.get(url) if 'txtVerifyCode' not in driver.page_source: # print ('Use old SESSION: ', SESSION) return SESSION # with Xvfb() as xvfb: SESSION = None session_pattern = r"SESSION=(\w+)" CAPTCHA_FILE = "/tmp/captcha-%s.gif" % getpass.getuser() retries=0 while SESSION is None: retries += 1 # print(retries) # driver = webdriver.Firefox() # driver = webdriver.PhantomJS() # driver = WebDriver() current_window = driver.window_handles[0] driver.get("http://portal.ntuh.gov.tw/General/Login.aspx") # # LogWebDriver('login1-') # print(driver.page_source) # matches = re.findall(session_pattern, driver.page_source) # print(matches) # if len(matches): # SESSION = matches[0] # CloseOtherWindows(current_window) # break # imgVerifyCode # ele_captcha = driver.find_element(By.ID, "imgVerifyCode") ele_captcha = driver.find_element(By.ID, 'imgVerifyCode') img_captcha_base64 = driver.execute_async_script( """ var ele = arguments[0], callback = arguments[1]; ele.addEventListener('load', function fn(){ ele.removeEventListener('load', fn, false); var cnv = document.createElement('canvas'); cnv.width = this.width; cnv.height = this.height; cnv.getContext('2d').drawImage(this, 0, 0); callback(cnv.toDataURL('image/jpeg').substring(22)); }, false); ele.dispatchEvent(new Event('load')); """, ele_captcha, ) # save the captcha to a file with open(CAPTCHA_FILE, "wb") as f: f.write(base64.b64decode(img_captcha_base64)) img = cv2.imread(CAPTCHA_FILE, 0) # img = cv2.imread(CAPTCHA_FILE) VerifyCode = pytesseract.image_to_string(img).strip() # print('---%s---'%VerifyCode) # print(VerifyCode) # exit() elem = driver.find_element(By.ID, "txtUserID") elem.send_keys(UserID) elem = driver.find_element(By.ID, "txtPass") # elem.click() elem.send_keys(Pass) elem = driver.find_element(By.ID, "txtVerifyCode") # elem.click() elem.send_keys(VerifyCode) # time.sleep(1) # LogWebDriver('login2-') elem = driver.find_element(By.ID, "rdblQuickMenu_0") elem.click() elem = driver.find_element(By.ID, "imgBtnSubmitNew") # time.sleep(1) elem.click() # time.sleep(1) # print(type(driver)) # print(dir(driver)) # LogWebDriver('login3-') # driver.get_screenshot_as_file('screenshot.png') html = driver.page_source # try: # html = driver.page_source # except UnexpectedAlertPresentException as e: # driver.switch_to.alert.accept() # html = driver.page_source # html = driver.execute_script("return document.documentElement.outerHTML;") CloseOtherWindows(current_window) # print driver.current_url # driver.close() matches = re.findall(session_pattern, html) if len(matches): SESSION = matches[0] # pattern = "SESSION=(\w*)" # matches = re.findall(pattern, html) # SESSION = matches[0] if UserID==DefaultUserID: try: c = { "node": node, "SESSION": SESSION, "SESSION_CREATED": NOW, "SESSION_TIMESTAMP": NOW, } collection.insert_one(c) collection.create_index("node") collection.create_index("SESSION_CREATED") client.close() except: pass ''' d = shelve.open(SHELVE_FILE) d["SESSION"] = SESSION d["SESSION_CREATED"] = NOW d["SESSION_TIMESTAMP"] = NOW d.close() ''' # print ('Use new SESSION: ', SESSION) return SESSION # http://hisaw.ntuh.gov.tw/WebApplication/OutPatientAdministration/QueryModifyPatBase.aspx?SESSION=FFC71ED5896A4887BDEE7FD258D4738F def QueryModifyPatBase(request, SESSION=None): # print(1) if not SESSION: SESSION = Login() # print(2) url = ( "http://hisaw.ntuh.gov.tw/WebApplication/OutPatientAdministration/QueryModifyPatBase.aspx?SESSION=%s" % SESSION ) # print(url) # with Xvfb() as xvfb: if True: # driver = webdriver.Firefox() # driver = WebDriver() driver.get(url) LogWebDriver() if "IdNo" in request and len(request["IdNo"]): elem = driver.find_element(By.ID, "NTUHWeb1_txtIdNo") elem.send_keys(request["IdNo"]) elem = driver.find_element(By.ID, "NTUHWeb1_btnQuery") elem.click() elif "ChartNo" in request and len(request["ChartNo"]): elem = driver.find_element(By.ID, "NTUHWeb1_txtChartNo") elem.send_keys(request["ChartNo"]) elem = driver.find_element(By.ID, "NTUHWeb1_btnQuery") elem.click() else: return None # print(4) html = driver.page_source # driver.close() if "查無此病歷資料" in html: return None pqhtml = pyquery.PyQuery(html) r = {} r["ChartNo"] = pqhtml("#NTUHWeb1_labCartNo").text() r["SpecialCase"] = pqhtml("#NTUHWeb1_drpPatBaseSpecialCase option:selected").text() r["IdNo"] = pqhtml("#NTUHWeb1_txtPatBaseIdNo").val() r["ChtName"] = xstr(pqhtml("#NTUHWeb1_txtPatBaseFChtName").val()) + xstr( pqhtml("#NTUHWeb1_txtPatBaseGChtName").val() ) r["EngName"] = ( xstr(pqhtml("#NTUHWeb1_txtPatBaseFEngName").val()) + " " + xstr(pqhtml("#NTUHWeb1_txtPatBaseGEngName").val()) + " " + xstr(pqhtml("#NTUHWeb1_txtPatBaseLEngName").val()) ) r["Sex"] = pqhtml("#NTUHWeb1_drpPatBaseSexCode option:selected").text() r["Birth"] = datetime.date( int(pqhtml("#NTUHWeb1_txtPatBaseBirthYear").val()), int(pqhtml("#NTUHWeb1_txtPatBaseBirthMonth").val()), int(pqhtml("#NTUHWeb1_txtPatBaseBirthDay").val()), ).isoformat() r["BirthCity"] = pqhtml("#NTUHWeb1_drpPatBaseBirthCity option:selected").text() r["Nation"] = pqhtml("#NTUHWeb1_drpPatBaseNation option:selected").text() r["AddressControl1"] = ( pqhtml("#NTUHWeb1_AddressControl1_ddlZipCity option:selected").text() + pqhtml("#NTUHWeb1_AddressControl1_ddlZipArea option:selected").text() + xstr(pqhtml("#NTUHWeb1_AddressControl1_txtZipCode").val()) + xstr(pqhtml("#NTUHWeb1_AddressControl1_txtAddressRoad").val()) ) r["AddressControl2"] = ( pqhtml("#NTUHWeb1_AddressControl2_ddlZipCity option:selected").text() + pqhtml("#NTUHWeb1_AddressControl2_ddlZipArea option:selected").text() + xstr(pqhtml("#NTUHWeb1_AddressControl2_txtZipCode").val()) + xstr(pqhtml("#NTUHWeb1_AddressControl2_txtAddressRoad").val()) ) r["ContTel"] = xstr(pqhtml("#NTUHWeb1_txtPatBaseContTelAreaCode").val()) + xstr( pqhtml("#NTUHWeb1_txtPatBaseContTelNo").val() ) r["Mobile"] = pqhtml("#NTUHWeb1_txtPatBaseMobile").val() r["Contacter"] = xstr(pqhtml("#NTUHWeb1_txtPatBaseContacterFname").val()) + xstr( pqhtml("#NTUHWeb1_txtPatBaseContacterGname").val() ) r["ContacterRelation"] = pqhtml( "#NTUHWeb1_drpPatBaseContacterRelation option:selected" ).text() r["ContacterTel"] = xstr( pqhtml("#NTUHWeb1_txtPatBaseContacterTelArea").val() ) + xstr(pqhtml("#NTUHWeb1_txtPatBaseContacterTel").val()) r["ContacterMobile"] = pqhtml("#NTUHWeb1_txtPatBaseContacterMobile").val() r["AddressControl3"] = ( pqhtml("#NTUHWeb1_AddressControl3_ddlZipCity option:selected").text() + pqhtml("#NTUHWeb1_AddressControl3_ddlZipArea option:selected").text() + xstr(pqhtml("#NTUHWeb1_AddressControl3_txtZipCode").val()) + xstr(pqhtml("#NTUHWeb1_AddressControl3_txtAddressRoad").val()) ) # print dir(pqhtml('#NTUHWeb1_drpPatBaseSexCode')) # print type(pqhtml('#NTUHWeb1_drpPatBaseSexCode')) return r def PACSImageShowList(PersonID, SESSION=None): """ Show list of PACS Image """ if SESSION is None: SESSION = Login() # url = "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PACSImageShowList.aspx?SESSION=%s&PatClass=I&AccountIDSE=10T01921636&PersonID=%s&Hosp=T0&Seed=20100915175850&EMRPop=Y" % (SESSION,PersonID) url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PACSImageShowList.aspx?SESSION=%s&PatClass=I&PersonID=%s&Hosp=T0&EMRPop=Y" % (SESSION, PersonID) ) # print url driver.get(url) html = driver.page_source pqhtml = pyquery.PyQuery(html) pattern = "(BloodCallRecordDataGrid_ctl.*)_PatChartNo" matches = re.findall(pattern, html) results = [] for match in matches: # print match # print pqhtml('#%s_PatChartNo'%match).text() r = {} r["PatChartNo"] = remove_space(pqhtml("#%s_PatChartNo" % match).text()) r["RequestSheetNo"] = remove_space(pqhtml("#%s_RequestSheetNo" % match).text()) r["ExamDate"] = remove_space(pqhtml("#%s_ExamDate" % match).text()) r["LinkOrderName"] = remove_space(pqhtml("#%s_LinkOrderNameNew" % match).text()) r["Modality"] = remove_space(pqhtml("#%s_Modality" % match).text()) r["VerifiedStateString"] = remove_space( pqhtml("#%s_VerifiedStateString" % match).text() ) results.append(r) return results # 4th 報告 def ElectronicMedicalReportViewer(ID, startswith='', excluded=[], SESSION=None): if not SESSION: SESSION = Login() # url = 'http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx' url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx?SESSION=%s" % SESSION ) driver.get(url) elem = driver.find_element(By.ID, "txbIDInput") elem.send_keys(ID) elem = driver.find_element(By.ID, "btnQueryAction") elem.click() for elem in driver.find_elements_by_class_name("groupHeader"): # print dir(elem) if "groupHeader-close" not in elem.get_attribute("class"): elem.click() body = driver.page_source d = pyquery.PyQuery(body) report = [] for ri in d(".reportitem").remove_namespaces(): # remove_namespaces is required for pyquery to parse tr # ri = etree.tostring(ri).replace('xmlns="http://www.w3.org/1999/xhtml"', '') # print(dir(ri)) # print(ri.attrib['param']) # exit() pqri = pyquery.PyQuery(ri) r = {} r['param'] = ri.attrib['param'] r["報告類別"] = pqri("td")[0].text.strip() r["檢查日期"] = pqri("td")[1].text.strip() r["報告日期"] = pqri("td")[2].text.strip() if startswith and not r['param'].startswith(startswith): continue if r['param'] in excluded: continue if r["檢查日期"] == '*' or r["報告日期"] == '*': continue id = pqri(".linkrptbt").attr.id # print id elem = driver.find_element(By.ID, id) # print id elem.click() ifrmURL = pyquery.PyQuery(driver.page_source)("#Reportifrm").attr.src # print ifrmURL parse = parse_qs(urlparse(ifrmURL).query) r["ChartNo"] = parse["ChartNo"][0] r["ReportKey"] = parse["ReportKey"][0] r["ReportCode"] = parse["ReportCode"][0] r["PersonID"] = parse["PersonID"][0] driver.switch_to_frame(driver.find_element(By.ID, "Reportifrm")) html = driver.page_source driver.switch_to_default_content() r["url"] = ifrmURL r["html"] = html # print html elem = driver.find_element(By.ID, "backToHome") elem.click() # d2 = pyquery.PyQuery(html)('span').filter(lambda i: '_lbl' in pyquery.PyQuery(this).attr.id) d2 = pyquery.PyQuery(html)("span") # d2 = pyquery.PyQuery(html)('span').filter(lambda i: hasattr(pyquery.PyQuery(this), 'attrib') and 'id' in pyquery.PyQuery(this).attrib) for lbl in d2: if "id" not in lbl.attrib: continue if "_lbl" not in lbl.attrib["id"]: continue if "Title" in lbl.attrib["id"]: continue key = lbl.attrib["id"].split("_")[-1][3:] value = lbl.text r[key] = value report.append(r) # break return report def SimpleInfoShowUsingPlaceHolder(form_name): # print driver.window_handles # driver.save_screenshot('screenshot1.png') data = {} form = driver.find_element_by_name(form_name) for input in form.find_elements_by_css_selector("input"): data[input.get_attribute("name")] = input.get_attribute("value") KeyCodeList = data["KeyCodeList"].split("|") KeyNameList = data["KeyNameList"].split("|") for i in range(len(KeyCodeList)): if KeyCodeList[i] == data["AccountIDSE"]: data["KeyCode"] = KeyCodeList[i] data["KeyName"] = KeyNameList[i] old_window = driver.window_handles[0] new_window = driver.window_handles[-1] # logging.info((len(driver.window_handles), old_window, new_window)) driver.switch_to.window(new_window) # driver.save_screenshot('screenshot2.png') data["html"] = driver.page_source data["url"] = driver.current_url driver.close() driver.switch_to.window(old_window) # driver.save_screenshot('screenshot3.png') driver.back() # driver.save_screenshot('screenshot4.png') # exit() return data def PatientMedicalRecordListQuery(Chart, AfterDate=None, SESSION=None): # def PatientMedicalRecordListQuery(q, Chart, SESSION = None): if not SESSION: SESSION = Login() Chart = str(Chart) while len(Chart) < 7: Chart = "0" + Chart """ 病患就診紀錄 Show hospital visit AfterDate: Also fetch medical record after Date """ url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PatientMedicalRecordListQuery.aspx?QueryBySelf=N&SESSION=%s" % SESSION ) # print url driver.get(url) elem = driver.find_element_by_css_selector("input.textBoxShort:nth-child(4)") elem.send_keys(Chart) elem = driver.find_element_by_css_selector( "#NTUHWeb1_PatientBasicInfoQueryByIDAndName1 > input:nth-child(13)" ) elem.click() body = driver.page_source # print body pqhtml = pyquery.PyQuery(body) result = {} # 已死亡? pattern = r'(?P.*?)\((?P.*?),(?P.*?),(?P.*?)\) (?P .*)? \((?P.*?)\)' r = re.compile(pattern) m = r.search(body) if m: result["Description"] = m.group(0) # exit() d = [m.groupdict() for m in r.finditer(body)] if d: d[0]["dead"] = None if d[0]["extra"]: # matches = re.findall(ur'(..../../..) 已死亡', d[0]['extra']) matches = re.findall(r"(..../../..) 已死亡", d[0]["extra"]) if matches: d[0]["dead"] = matches[0].replace("/", "-") result.update(d[0]) else: result["dead"] = None # if 'birthday' in result: # result['birthday'] = datetime.datetime.strptime(result['birthday'], "%Y/%m/%d") pattern = ( r'id="NTUHWeb1_PatAccountListRecord1_personidHidden" value="(?P.*?)"' ) r = re.compile(pattern) d = [m.groupdict() for m in r.finditer(body)] if d: result["PersonID"] = d[0]["PersonID"] # 住 InPatRecord = pqhtml("#NTUHWeb1_PatAccountListRecord1_GridViewInPatRecord") InPat = [] for tr in pyquery.PyQuery(InPatRecord)("tr"): r = {} pqtr = pyquery.PyQuery(tr) pqspan = pqtr("span") if not len(pqspan): continue for span in pqspan: key = span.attrib["id"].split("_")[-1][7:] r[key] = span.text OutDate = ( datetime.datetime.strptime(r["OutDate"], "%Y/%m/%d").date() if r["OutDate"] else None ) if AfterDate and OutDate and OutDate >= AfterDate: elem = driver.find_element(By.ID, pqtr("input")[0].attrib["id"]) elem.click() r.update(SimpleInfoShowUsingPlaceHolder("dischargenotelist")) r["InDate"] = r["InDate"].replace("/", "-") if r["OutDate"]: r["OutDate"] = r["OutDate"].replace("/", "-") InPat.append(r) result["InPat"] = InPat # 急 EmergencyContent = pqhtml( "#NTUHWeb1_PatAccountListRecord1_GridViewEmergencyContent" ) Emergency = [] for tr in pyquery.PyQuery(EmergencyContent)("tr"): r = {} pqtr = pyquery.PyQuery(tr) pqspan = pqtr("span") if not len(pqspan): continue for span in pqspan: key = span.attrib["id"].split("_")[-1][9:] r[key] = span.text DischargeDate = ( datetime.datetime.strptime(r["DischargeDate"], "%Y/%m/%d").date() if r["DischargeDate"] else None ) if AfterDate and DischargeDate and DischargeDate >= AfterDate: elem = driver.find_element(By.ID, pqtr("input")[0].attrib["id"]) elem.click() r.update(SimpleInfoShowUsingPlaceHolder("emerrecordlist")) r["ComeClinicDate"] = r["ComeClinicDate"].replace("/", "-") if r["DischargeDate"]: r["DischargeDate"] = r["DischargeDate"].replace("/", "-") Emergency.append(r) result["Emergency"] = Emergency # 門 OutPatRecord = pqhtml("#NTUHWeb1_PatAccountListRecord1_GridViewOutPatRecord") OutPat = [] for tr in pyquery.PyQuery(OutPatRecord)("tr"): r = {} pqtr = pyquery.PyQuery(tr) pqspan = pqtr("span") if not len(pqspan): continue for span in pqspan: key = span.attrib["id"].split("_")[-1][5:] r[key] = span.text ComeClinicDate = ( datetime.datetime.strptime(r["ComeClinicDate"], "%Y/%m/%d").date() if r["ComeClinicDate"] else None ) if AfterDate and ComeClinicDate and ComeClinicDate >= AfterDate: elem = driver.find_element(By.ID, pqtr("input")[0].attrib["id"]) elem.click() logging.info(r) r.update(SimpleInfoShowUsingPlaceHolder("dischargenotelist")) r["ComeClinicDate"] = r["ComeClinicDate"].replace("/", "-") OutPat.append(r) result["OutPat"] = OutPat return result # import pprint # class MyPrettyPrinter(pprint.PrettyPrinter): # def format(self, object, context, maxlevels, level): # if isinstance(object, unicode): # return (object.encode('utf8'), True, False) # return pprint.PrettyPrinter.format(self, object, context, maxlevels, level) def SimpleQueryOpSchedule(DrCode, StartDate, EndDate, SESSION=None, NoCheck=[]): if not SESSION: SESSION = Login() today_year = datetime.date.today().year url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/OPManagement/SimpleQueryOpSchedule.aspx?SESSION=%s" % SESSION ) # url = "http://www.hinet.net/" # print url driver.get(url) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_txbStartDate" ) elem.clear() elem.send_keys(str(StartDate)) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_txbEndDate" ) elem.clear() elem.send_keys(str(EndDate)) # put here to delay javascript? seems little effect elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_QueryDrIDInfoByDrName1_EmpNoQueryInput" ) elem.clear() elem.send_keys(DrCode) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_QueryByMainDrCode" ) try: # driver.save_screenshot('/tmp/screenshot1.png') # time.sleep(1) elem.click() except Exception as e: # driver.save_screenshot('/tmp/screenshot2.png') PrintException() # time.sleep(1) # driver.save_screenshot('/tmp/screenshot3.png') # exit() # return results logging.info(elem) body = driver.page_source # print(body) # exit() pqhtml = pyquery.PyQuery(body) results = [] for tr in pqhtml( "#NTUHWeb1_QueryOPPatListCommon1_OPScheduleShowDataGrid1_dgRecordData" )("tr")[1:]: # print (dir(tr)) # print (type(tr)) # import lxml # print(lxml.etree.tostring(tr)) # exit() r = {} pqtr = pyquery.PyQuery(tr) children = pqtr("span") + pqtr("a") for child in children: # print (dir(child)) # print (child.text_content().strip()) # exit() key = child.attrib["id"].split("_")[-1] r[key] = child.text if "title" in child.attrib: r[key + "Title"] = child.attrib["title"].strip() if r["OPDateString"].count("/") == 1: r["OPDateString"] = "%s/%s" % (today_year, r["OPDateString"]) r["Complete"] = r["CompleteStatueName"] r["OPDate"] = r["OPDateString"] r["OpRoomNo"] = r["OpRoomNoShow"] r["OpSeqNo"] = r["OpSeqNoshow"] r["PatName"] = r["LinkPatName"] r["PatWard"] = r["PopupPatWardInfoWindow"] # print(r) # MyPrettyPrinter().pprint(r) # r['Anes'] = r['lbtPrintTitle'] r["Anes"] = r["lbtPreAnesFormTitle"] r["StartTime"] = r["EstStartTimeShortString"] r["SpendTime"] = r["EstSpendTime"] # print r['OPDateString'], id = "%s-%s-%s-%s" % ( r["OPDateString"], r["OpRoomNo"], r["OpSeqNo"], r["PatChartNo"], ) # print id # logging.info(r) # if False: # why do we need this? if r["CompleteStatueName"] != "完成" and id not in NoCheck: # print "Investigate %s" % id r["lbnSelect"] = pqtr("a")[0].attrib["id"] r["javascript"] = pqtr("a")[0].attrib["href"].split(":")[1] # print r['javascript'] try: # time.sleep(10) elem = driver.find_element(By.ID, r["lbnSelect"]) # driver.save_screenshot('/tmp/screenshot1.png') elem.click() except Exception as e: # driver.save_screenshot('/tmp/screenshot2.png') # PrintException() logging.exception(e) driver.save_screenshot("screenshot.png") time.sleep(1) # driver.save_screenshot('/tmp/screenshot3.png') # exit() # results.append(r) # continue pqhtml2 = pyquery.PyQuery(driver.page_source) r["Remark"] = pqhtml2("#NTUHWeb1_QueryOpSchedule1_txbRemark").attr["value"] r["MainOpMode"] = pqhtml2( "#NTUHWeb1_QueryOpSchedule1_TextboxPrepareOPMode" ).attr["value"] driver.back() # except Exception,e: # print str(e) # exit() results.append(r) return results def SimpleQueryOpScheduleByChartNo(ChartNo, SESSION=None, NoCheck=[]): if not SESSION: SESSION = Login() today_year = datetime.date.today().year url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/OPManagement/SimpleQueryOpSchedule.aspx?SESSION=%s" % SESSION ) driver.get(url) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_QueryPersonIDByChartNo1_txbChartNoInput" ) elem.send_keys(ChartNo) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_QueryPersonIDByChartNo1_btnQuery" ) try: elem.click() except Exception as e: PrintException() body = driver.page_source pqhtml = pyquery.PyQuery(body) results = [] for tr in pqhtml( "#NTUHWeb1_QueryOPPatListCommon1_OPScheduleShowDataGrid1_dgRecordData" )("tr")[1:]: r = {} pqtr = pyquery.PyQuery(tr) children = pqtr("span") + pqtr("a") for child in children: key = child.attrib["id"].split("_")[-1] r[key] = child.text if "title" in child.attrib: r[key + "Title"] = child.attrib["title"].strip() if r["OPDateString"].count("/") == 1: r["OPDateString"] = "%s/%s" % (today_year, r["OPDateString"]) r["Complete"] = r["CompleteStatueName"] r["OPDate"] = r["OPDateString"] r["OpRoomNo"] = r["OpRoomNoShow"] r["OpSeqNo"] = r["OpSeqNoshow"] r["PatName"] = r["LinkPatName"] r["PatWard"] = r["PopupPatWardInfoWindow"] r["Anes"] = r["lbtPreAnesFormTitle"] r["StartTime"] = r["EstStartTimeShortString"] r["SpendTime"] = r["EstSpendTime"] id = "%s-%s-%s-%s" % ( r["OPDateString"], r["OpRoomNo"], r["OpSeqNo"], r["PatChartNo"], ) if r["CompleteStatueName"] != "完成" and id not in NoCheck: r["lbnSelect"] = pqtr("a")[0].attrib["id"] r["javascript"] = pqtr("a")[0].attrib["href"].split(":")[1] try: elem = driver.find_element(By.ID, r["lbnSelect"]) elem.click() except Exception as e: PrintException() time.sleep(1) pqhtml2 = pyquery.PyQuery(driver.page_source) r["Remark"] = pqhtml2("#NTUHWeb1_QueryOpSchedule1_txbRemark").attr["value"] r["MainOpMode"] = pqhtml2( "#NTUHWeb1_QueryOpSchedule1_TextboxPrepareOPMode" ).attr["value"] driver.back() results.append(r) return results # For resident def SimpleQueryOpScheduleDRRS(DrCode, StartDate, EndDate, SESSION=None, NoCheck=[]): if not SESSION: SESSION = Login() today_year = datetime.date.today().year url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/OPManagement/SimpleQueryOpSchedule.aspx?SESSION=%s" % SESSION ) # url = "http://www.hinet.net/" # print url driver.get(url) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_txbStartDate" ) elem.clear() elem.send_keys(str(StartDate)) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_txbEndDate" ) elem.clear() elem.send_keys(str(EndDate)) # put here to delay javascript? seems little effect elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_QueryDrIDInfoByDrName1_EmpNoQueryInput" ) elem.clear() elem.send_keys(DrCode) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryOPPatListCommon1_QueryFinishRecordByDRRS" ) try: # driver.save_screenshot('/tmp/screenshot1.png') # time.sleep(1) elem.click() except Exception as e: # driver.save_screenshot('/tmp/screenshot2.png') PrintException() # time.sleep(1) # driver.save_screenshot('/tmp/screenshot3.png') # exit() # return results body = driver.page_source # print(body) # exit() pqhtml = pyquery.PyQuery(body) results = [] # return len( # pqhtml("#NTUHWeb1_QueryOPPatListCommon1_OPScheduleShowDataGrid1_dgRecordData")( # "tr" # )[1:] # ) for tr in pqhtml( "#NTUHWeb1_QueryOPPatListCommon1_OPScheduleShowDataGrid1_dgRecordData" )("tr")[1:]: # print (dir(tr)) # print (type(tr)) # import lxml # print(lxml.etree.tostring(tr)) # exit() r = {} pqtr = pyquery.PyQuery(tr) children = pqtr("span") + pqtr("a") for child in children: # print (dir(child)) # print (child.text_content().strip()) # exit() key = child.attrib["id"].split("_")[-1] r[key] = child.text if "title" in child.attrib: r[key + "Title"] = child.attrib["title"].strip() if r["OPDateString"].count("/") == 1: r["OPDateString"] = "%s/%s" % (today_year, r["OPDateString"]) # print(r.keys()) # print(r['lbtPreAnesForm']) r["Complete"] = r["CompleteStatueName"] r["OPDate"] = r["OPDateString"] r["OpRoomNo"] = r["OpRoomNoShow"] r["OpSeqNo"] = r["OpSeqNoshow"] r["PatName"] = r["LinkPatName"] r["PatWard"] = r["PopupPatWardInfoWindow"] # r["Anes"] = r["lbtPrintTitle"] r["Anes"] = r["lbtPreAnesForm"] r["StartTime"] = r["EstStartTimeShortString"] r["SpendTime"] = r["EstSpendTime"] # print r['OPDateString'], id = "%s-%s-%s-%s" % ( r["OPDateString"], r["OpRoomNo"], r["OpSeqNo"], r["PatChartNo"], ) # print id if r["CompleteStatueName"] != "完成" and id not in NoCheck: # print "Investigate %s" % id r["lbnSelect"] = pqtr("a")[0].attrib["id"] r["javascript"] = pqtr("a")[0].attrib["href"].split(":")[1] # print r['javascript'] try: # time.sleep(10) elem = driver.find_element(By.ID, r["lbnSelect"]) # driver.save_screenshot('/tmp/screenshot1.png') elem.click() except Exception as e: # driver.save_screenshot('/tmp/screenshot2.png') PrintException() time.sleep(1) # driver.save_screenshot('/tmp/screenshot3.png') # exit() # results.append(r) # continue pqhtml2 = pyquery.PyQuery(driver.page_source) r["Remark"] = pqhtml2("#NTUHWeb1_QueryOpSchedule1_txbRemark").attr["value"] r["MainOpMode"] = pqhtml2( "#NTUHWeb1_QueryOpSchedule1_TextboxPrepareOPMode" ).attr["value"] driver.back() # except Exception,e: # print str(e) # exit() results.append(r) return results # 病歷號/身分證號 def ReportPathology(ID, SESSION=None): if not SESSION: SESSION = Login() url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/ElectronicMedicalReportViewer/MobileMasterPage.aspx?SESSION=%s" % SESSION ) driver.get(url) elem = driver.find_element(By.ID, "txbIDInput") elem.send_keys(str(ID)) elem = driver.find_element(By.ID, "btnQueryAction") elem.click() for elem in driver.find_elements_by_class_name("groupHeader"): if "groupHeader-close" not in elem.get_attribute("class"): elem.click() body = driver.page_source d = pyquery.PyQuery(body) d1 = d('.groupHeader[reportgroup="病理"]').parents() if len(d1) == 0: return [] d2 = pyquery.PyQuery(d('.groupHeader[reportgroup="病理"]').parents()[-1]) report = [] for ri in d2(".reportitem"): # print dir(ri) # print ri.attrib['param'] pqri = pyquery.PyQuery(ri) # # ignore item without ReportDate # if pqri('td')[2].text.strip() == '*': # continue id = pqri(".linkrptbt").attr.id logging.info(id) elem = driver.find_element(By.ID, id) elem.click() ifrmURL = pyquery.PyQuery(driver.page_source)("#Reportifrm").attr.src # parse = urlparse.parse_qs(urlparse.urlparse(ifrmURL).query) parse = parse_qs(urlparse(ifrmURL).query) logging.info(parse) ChartNo = parse["ChartNo"][0] ReportKey = parse["ReportKey"][0] ReportCode = parse["ReportCode"][0] PersonID = parse["PersonID"][0] driver.switch_to_frame(driver.find_element(By.ID, "Reportifrm")) html = driver.page_source driver.switch_to_default_content() elem = driver.find_element(By.ID, "backToHome") elem.click() d = pyquery.PyQuery(html) PathCode = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblPathCode").text() SpecimenGetDate = d( "#rReportTab_lsvReportBody_ctrl0_ctl00_lblSpecimenGetDate" ).text() ReportDate = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblReportDate").text() SpecimenCode = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblSpecimenCode").text() DepCode = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblDepCode").text() WardNoRoomCoBedNo = d( "#rReportTab_lsvReportBody_ctrl0_ctl00_lblWardNoRoomCoBedNo" ).text() Tissue = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblTissue").text() BedDiagnosis = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblBedDiagnosis").text() Result = d("#rReportTab_lsvReportBody_ctrl0_ctl00_lblResult").text() ExamDoctorName = d( "#rReportTab_lsvReportBody_ctrl0_ctl00_lblExamDoctorName" ).text() InChargeDoctorName = d( "#rReportTab_lsvReportBody_ctrl0_ctl00_lblInChargeDoctorName" ).text() ReCheckDoctorName = d( "#rReportTab_lsvReportBody_ctrl0_ctl00_lblReCheckDoctorName" ).text() # ignore item without ReportDate if ReportDate == "": continue report.append( { "url": ifrmURL, "html": html, "ChartNo": ChartNo, "ReportKey": ReportKey, "ReportCode": ReportCode, "PersonID": PersonID, "PathCode": PathCode, "SpecimenGetDate": SpecimenGetDate, "ReportDate": ReportDate, "SpecimenCode": SpecimenCode, "DepCode": DepCode, "WardNoRoomCoBedNo": WardNoRoomCoBedNo, "Tissue": Tissue, "BedDiagnosis": BedDiagnosis, "Result": Result, "ExamDoctorName": ExamDoctorName, "InChargeDoctorName": InChargeDoctorName, "ReCheckDoctorName": ReCheckDoctorName, } ) return report def QueryInPatientByDate(ID, begin, end, SESSION=None): """ 病房作業 查詢條件輸入 依使用者 """ if not SESSION: SESSION = Login() url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/Ward/OpenWard.aspx?SESSION=%s" % SESSION ) driver.get(url) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_IDInputTextBox" ) elem.clear() elem.send_keys(ID) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI1_YearInput" ) elem.clear() elem.send_keys(begin.year) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI1_MonthInput" ) elem.clear() elem.send_keys(begin.month) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI1_DayInput" ) elem.clear() elem.send_keys(begin.day) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI2_YearInput" ) elem.clear() elem.send_keys(end.year) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI2_MonthInput" ) elem.clear() elem.send_keys(end.month) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DateTextBoxYearMonthDayInputUI2_DayInput" ) elem.clear() elem.send_keys(end.day) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_EmpNoCareQueryButton" ) elem.click() # driver.save_screenshot('/tmp/screenshot1.png') elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_CheckBoxShowDrMainColumn" ) # if elem.get_attribute('checked'): # elem.click() # time.sleep(1) while elem.get_attribute("checked"): elem.click() time.sleep(1) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryInPatientPersonAccountControl1_CheckBoxShowDrMainColumn" ) # driver.save_screenshot('/tmp/screenshot2.png') # exit() # browser.snapshot().save("webpage3.png") html = driver.page_source # print html # browser.close() d = pyquery.PyQuery(html) PatientList = [] for tr in d( "#NTUHWeb1_QueryInPatientPersonAccountControl1_DataGridAccountList > tbody" )("tr")[1:]: d2 = pyquery.PyQuery(tr) # print d2.text() # print d2('span') # return Pat = {} for s in d2("span") + d2("a"): if "id" in s.attrib: key = s.attrib["id"].split("_")[-1] Pat[key] = s.text if "title" in s.attrib: Pat[key + "Title"] = s.attrib["title"] Pat["主治"] = d2("span")[7].text Pat["住院"] = d2("span")[8].text Pat["科"] = d2("span")[9].text Pat["入"] = d2("span")[10].text Pat["住院總天數"] = d2("span")[10].attrib["title"].split(":")[-1].strip() Pat["出"] = d2("span")[11].text Pat["狀態"] = d2("span")[12].text Pat["狀態2"] = d2("span")[13].text Pat["id"] = "%s-%s" % (Pat["入"], Pat["PatChartNo"]) # print Pat PatientList.append(Pat) return PatientList def QueryInPatientByMonth(ID, year, month, SESSION=None): """ 病房作業 查詢條件輸入 依使用者 """ if not SESSION: SESSION = Login() weakday, number = calendar.monthrange(year, month) # year = str(year) # month = str(month) # number = str(number) return QueryInPatientByDate( ID, datetime.date(year, month, 1), datetime.date(year, month, number), SESSION ) def QueryDrIDInfoByID(ID, SESSION=None): if not SESSION: SESSION = Login() url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/InPatient/Ward/OpenWard.aspx?SESSION=%s&FromLogin=Y" % SESSION ) driver.get(url) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryDrIDInfoByDrName1_EmpNoQueryInput" ) elem.send_keys(ID) elem = driver.find_element_by_css_selector( "#NTUHWeb1_QueryDrIDInfoByDrName1_QueryByID" ) elem.click() try: element = WebDriverWait(driver, 10).until( EC.presence_of_element_located( (By.ID, "NTUHWeb1_QueryDrIDInfoByDrName1_EmpNoLabel") ) ) finally: time.sleep(1) html = driver.page_source d = pyquery.PyQuery(html) string = d("#NTUHWeb1_QueryDrIDInfoByDrName1_EmpNoLabel").text().replace(" ", " ") # pattern = ur'(\S+):(\S+)' pattern = r"(\S+):(\S+)" ret = {} for m in re.findall(pattern, string): # print m[0], m[1] ret[m[0]] = m[1] return ret def WardQueryUncompletedChart(SESSION=None, DEBUG=None): if not SESSION: SESSION = Login() url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/MedicalRecordManagement/WardQueryUncompletedChart.aspx?SESSION=%s" % SESSION ) driver.get(url) elem = driver.find_element_by_css_selector("#NTUHWeb1_RBDEPT") elem.click() try: element = WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "NTUHWeb1_ddlDeptCode")) ) finally: time.sleep(1) elem = driver.find_element_by_css_selector("#NTUHWeb1_ddlDeptCode") select = Select(elem) select.select_by_value("SURG") elem = driver.find_element_by_css_selector("#NTUHWeb1_btn_QueryByDept") elem.click() try: element = WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.ID, "NTUHWeb1_DeptDataList")) ) finally: time.sleep(1) html = driver.page_source d = pyquery.PyQuery(html) Dr = {} for a in d("#Table1")("a"): id = a.attrib["id"] name = id.replace("DrID", "DrName") name = d("#%s" % name).text() Dr[id] = {"ID": a.text_content(), "name": name} ret = [] for id in Dr: # print id, "%s, %s"% (Dr[id]['ID'], Dr[id]['name']) elem = driver.find_element(By.ID, id) elem.click() # time.sleep(10) try: element = WebDriverWait(driver, 10).until( EC.text_to_be_present_in_element_value( (By.ID, "NTUHWeb1_txtDrName"), Dr[id]["name"] ) ) except: continue finally: time.sleep(1) html = driver.page_source d = pyquery.PyQuery(html) RecordData = [] overdue = 0 DrType = None for tr in d("#NTUHWeb1_dgRecordData")("tr")[1:]: pqtr = pyquery.PyQuery(tr) # print pqtr.text() # print dir(pqtr) td = pqtr("td") # print tr.text_content(), dir(tr) data = {} data["sequence"] = td[0].text data["MRN"] = td[1].text data["AccountIDSE"] = td[2].text_content().strip() data["unit"] = td[3].text data["start"] = td[4].text data["end"] = td[5].text data["name"] = td[6].text data["bed"] = td[7].text data["Type"] = td[8].text_content().strip() data["VS"] = td[9].text data["R"] = td[10].text data["note"] = td[11].text data["overdue"] = td[12].text.replace(" ", "") if Dr[id]["name"] == data["VS"]: DrType = "VS" elif DrType is None and Dr[id]["name"] == data["R"]: DrType = "R" if data["overdue"]: overdue += 1 RecordData.append(data) # print Dr[id]['ID'], Dr[id]['name'], DrType, overdue if overdue: ret.append( { "id": Dr[id]["ID"], "name": Dr[id]["name"], "type": DrType, "overdue": RecordData, } ) if DEBUG and len(ret) > 2: return ret return ret def WardQueryUncompletedChartCost(SESSION=None, DEBUG=None): ret = [] for a in WardQueryUncompletedChart(SESSION, DEBUG): a.update(QueryDrIDInfoByID(a["id"], SESSION)) ret.append(a) return ret def BriefHistoryLink(PersonID, SESSION=None): """ 病患就診紀錄 健保卡 查詢病人過去報告及相關單純查詢資料 """ if not SESSION: SESSION = Login() url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/ReportResultQuery.aspx?SESSION=%s&PersonID=%s" % (SESSION, PersonID) ) driver.get(url) # elem = driver.find_element_by_css_selector("#BriefHistoryLink") elem = driver.find_element(By.CSS_SELECTOR, "#BriefHistoryLink") elem.click() html = driver.page_source pattern = "身高:(.*?)體重:(.*?)" m = re.search(pattern, html) r = {} r["Height"] = None r["Weight"] = None try: r["Height"] = m.group(1).strip() r["Weight"] = m.group(2).strip() except: pass return r NotDrug = (None, "商品名", "特殊醫囑:", "用藥原因:", "處方", "重複原因:") def SimpleInfoShowUsingPlaceHolderDrug(form_name, timeout=10): # print driver.window_handles # driver.save_screenshot('/tmp/screenshot1.png') data = {} form = driver.find_element_by_name(form_name) for input in form.find_elements_by_css_selector("input"): data[input.get_attribute("name")] = input.get_attribute("value") KeyCodeList = data["KeyCodeList"].split("|") KeyNameList = data["KeyNameList"].split("|") for i in range(len(KeyCodeList)): if KeyCodeList[i] == data["AccountIDSE"]: data["KeyCode"] = KeyCodeList[i] data["KeyName"] = KeyNameList[i] old_window = driver.window_handles[0] new_window = driver.window_handles[-1] driver.switch_to.window(new_window) body = driver.page_source pqhtml = pyquery.PyQuery(body) # driver.save_screenshot('/tmp/screenshot2.png') data["html"] = driver.page_source data["url"] = driver.current_url TreeViewItemList = [] for a in pqhtml("a"): if "id" in a.attrib: id = a.attrib["id"] if id.startswith("TreeViewItemt"): m = re.search("'TreeViewItem','s(.*)'", a.attrib["href"]) KeyCode = m.group(1) KeyName = a.text TreeViewItemList.append((id, KeyCode, KeyName)) ret = [] # driver.implicitly_wait(10) for id, KeyCode, KeyName in TreeViewItemList: elem = driver.find_element(By.ID, id) # ignored_exceptions=(NoSuchElementException,StaleElementReferenceException,) # elem = WebDriverWait(driver, 10 ,ignored_exceptions=ignored_exceptions).until(expected_conditions.presence_of_element_located((By.ID, id))) # wait = WebDriverWait(driver, 10) # elem = wait.until(EC.element_to_be_clickable((By.ID, id))) elem.click() elem = WebDriverWait(driver, 10).until( expected_conditions.text_to_be_present_in_element( (By.CSS_SELECTOR, "div.reportQuery"), KeyCode ) ) # reportQuery = '' # cycles = 0 # while KeyCode not in reportQuery: # time.sleep(.1) # body = driver.page_source # pqItem = pyquery.PyQuery(body) # reportQuery = unicode(pqItem('div.reportQuery')) # cycles += 1 # old_page = driver.find_element_by_tag_name('html') # print(old_page.id) # WebDriverWait(driver, timeout).until(EC.staleness_of(old_page)) body = driver.page_source pqItem = pyquery.PyQuery(body) if six.PY2: reportQuery = unicode(pqItem("div.reportQuery")) TableDrugData = unicode(pqItem("#TableDrugData")) else: reportQuery = pqItem("div.reportQuery") TableDrugData = pqItem("#TableDrugData") Drug = [] if TableDrugData: pqTableDrugData = pyquery.PyQuery(TableDrugData) for tr in pqTableDrugData("tr"): pqtr = pyquery.PyQuery(tr) d = pqtr("td")[0].text if d not in NotDrug: # m = re.search("([\w.-]+) ", d) # print(type(lxml.html.tostring(tr))) Drug.append(lxml.html.tostring(tr).decode("utf-8")) # print(id) # print(KeyCode) # print('*** %s ***'%KeyName) # print('\n'.join(Drug)) # print(reportQuery) # print(TableDrugData) # print(len(TableDrugData)) ret.append( { "id": id, "KeyCode": KeyCode, "KeyName": KeyName, "Drug": Drug, # 'Drug': sorted(Drug), } ) driver.close() driver.switch_to.window(old_window) # driver.save_screenshot('/tmp/screenshot3.png') driver.back() return ret def ShowMedicalRecordDrug(Chart, SESSION=None): # def PatientMedicalRecordListQuery(q, Chart, SESSION = None): if not SESSION: SESSION = Login() Chart = str(Chart) while len(Chart) < 7: Chart = "0" + Chart """ 病患就診紀錄 Show hospital visit AfterDate: Also fetch medical record after Date """ url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PatientMedicalRecordListQuery.aspx?QueryBySelf=N&SESSION=%s" % SESSION ) # print url driver.get(url) elem = driver.find_element_by_css_selector("input.textBoxShort:nth-child(4)") elem.send_keys(Chart) elem = driver.find_element_by_css_selector( "#NTUHWeb1_PatientBasicInfoQueryByIDAndName1 > input:nth-child(13)" ) elem.click() body = driver.page_source pqhtml = pyquery.PyQuery(body) result = {} r = {} for input in pqhtml("input"): # id = input.attr.id name = input.attrib["name"] if name.endswith("ShowMedicalRecord"): elem = driver.find_element_by_name(name) elem.click() return SimpleInfoShowUsingPlaceHolderDrug("dischargenotelist") def OPNoteList(Chart, SESSION=None): # def PatientMedicalRecordListQuery(q, Chart, SESSION = None): if not SESSION: SESSION = Login() Chart = str(Chart) while len(Chart) < 7: Chart = "0" + Chart url = ( "http://ihisaw.ntuh.gov.tw/WebApplication/OtherIndependentProj/PatientBasicInfoEdit/PatientMedicalRecordListQuery.aspx?QueryBySelf=N&SESSION=%s" % SESSION ) # print url driver.get(url) elem = driver.find_element_by_css_selector("input.textBoxShort:nth-child(4)") elem.send_keys(Chart) elem = driver.find_element_by_css_selector( "#NTUHWeb1_PatientBasicInfoQueryByIDAndName1 > input:nth-child(13)" ) elem.click() driver.save_screenshot("old_window.png") logging.info('OPNoteList: old_window') elem = driver.find_element_by_css_selector('#NTUHWeb1_PatAccountListRecord1_ShowOperationList') elem.click() while True: old_window = driver.window_handles[0] new_window = driver.window_handles[-1] # logging.info((len(driver.window_handles), old_window, new_window)) if len(driver.window_handles) > 1: break time.sleep(1) driver.switch_to.window(new_window) driver.save_screenshot("new_window.png") body = driver.page_source pqhtml = pyquery.PyQuery(body) TreeViewItemList = [] for a in pqhtml("a"): if "id" in a.attrib: id = a.attrib["id"] if id.startswith("TreeViewItemt"): m = re.search("'TreeViewItem','s(.*)'", a.attrib["href"]) KeyCode = m.group(1) KeyName = a.text TreeViewItemList.append((id, KeyCode, KeyName)) ret = [] for id, KeyCode, KeyName in TreeViewItemList: logging.info((id, KeyCode, KeyName)) SurgDate = KeyName.split('_')[-1] RQ = None try : elem = driver.find_element(By.ID, id) elem.click() elem = WebDriverWait(driver, 10).until( expected_conditions.text_to_be_present_in_element( (By.CSS_SELECTOR, "div.reportQuery"), SurgDate ) ) except Exception as e: logging.exception(e) driver.save_screenshot("screenshot.png") continue body = driver.page_source pqItem = pyquery.PyQuery(body) reportQuery = pqItem("div.reportQuery") RQ = reportQuery.outerHtml() ret.append( { "id": id, "KeyCode": KeyCode, "KeyName": KeyName, "reportQuery": RQ, } ) logging.info(driver) driver.close() logging.info(driver) driver.switch_to.window(old_window) return ret def OpenClinics(date, AMPM, ClinicNo, Dept=None, SESSION=None): ''' 診間首頁選擇病人 ''' year, month, day = date.split('-') if not SESSION: SESSION = Login() url = ( "http://hisaw.ntuh.gov.tw/WebApplication/Clinics/OpenClinics.aspx?SESSION=%s" % SESSION ) # print url driver.get(url) if Dept: elem = driver.find_element_by_css_selector("#NTUHWeb1_DeptDropList") select = Select(elem) select.select_by_value(AMPM) elem = driver.find_element_by_css_selector("#NTUHWeb1_DateTextBoxYearMonthDayInputUI1_YearInput") elem.clear() elem.send_keys(year) elem = driver.find_element_by_css_selector("#NTUHWeb1_DateTextBoxYearMonthDayInputUI1_MonthInput") elem.clear() elem.send_keys(month) elem = driver.find_element_by_css_selector("#NTUHWeb1_DateTextBoxYearMonthDayInputUI1_DayInput") elem.clear() elem.send_keys(day) elem = driver.find_element_by_css_selector("#NTUHWeb1_AMPMDropList") select = Select(elem) select.select_by_value(AMPM) elem = driver.find_element_by_css_selector("#NTUHWeb1_ClinicNoInput") elem.clear() elem.send_keys(ClinicNo) driver.save_screenshot('screenshot.png') elem = driver.find_element_by_css_selector("#NTUHWeb1_QueryScheduleList") elem.click() elem = driver.find_element_by_css_selector("#NTUHWeb1_ShowDetailPatInfo") elem.click() body = driver.page_source pqBody = pyquery.PyQuery(body) span = (pqBody('#NTUHWeb1_PatientDefailInfoLabel')) # print(dir(span)) ret = [] for line in span.text().splitlines() : # print (line.split(' ')) ret.append(line.split(' ')[1]) # exit() return ret def lbl_value(tr, immutable=[]): r = {} d2 = pyquery.PyQuery(tr)("span") for lbl in d2: if "id" not in lbl.attrib: continue if "lbl" not in lbl.attrib["id"]: continue if "Title" in lbl.attrib["id"]: continue key = lbl.attrib["id"].split("_")[-1][3:] value = lbl.text # print(key,value) if (key not in r) or (key not in immutable): r[key] = value # print(r) return r # 健保審查系統 案件查詢 def QueryCase(NhiOrderCode='37029B', ApplySDate=None, SESSION=None): if not SESSION: SESSION = Login() url = ( "http://ahisaw.ntuh.gov.tw/WebApplication/Administration/NtuhACE/Pages/PreReview/QueryCase.aspx?SESSION=%s" % SESSION ) # print( url) # exit() driver.get(url) # 案件 不限 if ApplySDate is not None: elem = driver.find_element_by_css_selector("#txbApplySDate") elem.clear() elem.send_keys(ApplySDate) elem = driver.find_element_by_css_selector("#txbNhiOrderCode") elem.clear() elem.send_keys(NhiOrderCode) elem = driver.find_element_by_css_selector("#rdoBelongList_1") elem.click() elem = driver.find_element_by_css_selector("#btnQuery") elem.click() body = driver.page_source pqBody = pyquery.PyQuery(body) tbody = pqBody('#pnlCaseList > table:nth-child(6) > tbody:nth-child(1)') # for tr in pyquery.PyQuery(tbody)('tr'): # for i, tr in enumerate(tbody('tr').items()): ret = [] for i, tr in enumerate(tbody.children('tr')): if i == 0: continue # print(lbl_value(tr)) # exit() ret.append(lbl_value(tr, immutable=['PackageTime'])) # return ret # 個人案件 if ApplySDate is not None: elem = driver.find_element_by_css_selector("#txbApplySDate") elem.clear() elem.send_keys(ApplySDate) elem = driver.find_element_by_css_selector("#txbNhiOrderCode") elem.clear() elem.send_keys(NhiOrderCode) elem = driver.find_element_by_css_selector("#rdoBelongList_0") elem.click() elem = driver.find_element_by_css_selector("#btnQuery") elem.click() body = driver.page_source pqBody = pyquery.PyQuery(body) tbody = pqBody('#pnlCaseList > table:nth-child(6) > tbody:nth-child(1)') # for tr in pyquery.PyQuery(tbody)('tr'): # for i, tr in enumerate(tbody('tr').items()): lnkSelect = [] for a in tbody('a'): lnkSelect.append(a.attrib['id']) for id in lnkSelect: elem = driver.find_element(By.ID, id) elem.click() # print(driver.page_source) val = lbl_value(driver.page_source, immutable=['ApplyDate']) print(val) ret.append(val) driver.back() # break # exit() return ret def QueryCaseByPass(UserID, Pass, NhiOrderCode='37029B', ApplySDate=None): SESSION = Login(UserID, Pass, NewSession=True) url = ( "http://ahisaw.ntuh.gov.tw/WebApplication/Administration/NtuhACE/Pages/PreReview/QueryCase.aspx?SESSION=%s" % SESSION ) # print( url) # exit() driver.get(url) driver.get(url) # retry because the first will fail ret = [] # 個人案件 if ApplySDate is not None: elem = driver.find_element_by_css_selector("#txbApplySDate") elem.clear() elem.send_keys(ApplySDate) elem = driver.find_element_by_css_selector("#txbNhiOrderCode") elem.clear() elem.send_keys(NhiOrderCode) elem = driver.find_element_by_css_selector("#rdoBelongList_0") elem.click() elem = driver.find_element_by_css_selector("#btnQuery") elem.click() body = driver.page_source pqBody = pyquery.PyQuery(body) tbody = pqBody('#pnlCaseList > table:nth-child(6) > tbody:nth-child(1)') # for tr in pyquery.PyQuery(tbody)('tr'): # for i, tr in enumerate(tbody('tr').items()): lnkSelect = [] for a in tbody('a'): lnkSelect.append(a.attrib['id']) for id in lnkSelect: elem = driver.find_element(By.ID, id) elem.click() # print(driver.page_source) val = lbl_value(driver.page_source, immutable=['ApplyDate']) print(val) ret.append(val) driver.back() # break # exit() return ret def QueryCaseByChartNo(ChartNo, NhiOrderCode='37029B', ApplySDate='2017/01/01', SESSION=None): if not SESSION: SESSION = Login() url = ( "http://ahisaw.ntuh.gov.tw/WebApplication/Administration/NtuhACE/Pages/PreReview/QueryCase.aspx?SESSION=%s" % SESSION ) # print( url) # exit() driver.get(url) # elem = driver.find_element_by_css_selector("#txbChartNo") elem = driver.find_element(By.CSS_SELECTOR, "#txbChartNo") elem.clear() elem.send_keys(ChartNo) # elem = driver.find_element_by_css_selector("#txbApplySDate") elem = driver.find_element(By.CSS_SELECTOR, "#txbApplySDate") elem.clear() elem.send_keys(ApplySDate) # 案件 不限 elem = driver.find_element(By.CSS_SELECTOR, "#txbNhiOrderCode") elem.clear() elem.send_keys(NhiOrderCode) elem = driver.find_element(By.CSS_SELECTOR, "#rdoBelongList_1") elem.click() elem = driver.find_element(By.CSS_SELECTOR, "#btnQuery") elem.click() body = driver.page_source pqBody = pyquery.PyQuery(body) tbody = pqBody('#pnlCaseList > table:nth-child(6) > tbody:nth-child(1)') # for tr in pyquery.PyQuery(tbody)('tr'): # for i, tr in enumerate(tbody('tr').items()): ret = [] for i, tr in enumerate(tbody.children('tr')): if i == 0: continue # print(lbl_value(tr)) # exit() ret.append(lbl_value(tr, immutable=['PackageTime'])) # 個人案件 elem = driver.find_element(By.CSS_SELECTOR, "#txbNhiOrderCode") elem.clear() elem.send_keys(NhiOrderCode) elem = driver.find_element(By.CSS_SELECTOR, "#rdoBelongList_0") elem.click() elem = driver.find_element(By.CSS_SELECTOR, "#btnQuery") elem.click() body = driver.page_source pqBody = pyquery.PyQuery(body) tbody = pqBody('#pnlCaseList > table:nth-child(6) > tbody:nth-child(1)') # for tr in pyquery.PyQuery(tbody)('tr'): # for i, tr in enumerate(tbody('tr').items()): lnkSelect = [] for a in tbody('a'): lnkSelect.append(a.attrib['id']) for id in lnkSelect: elem = driver.find_element(By.ID, id) elem.click() # print(driver.page_source) val = lbl_value(driver.page_source, immutable=['ApplyDate']) # print(val) ret.append(val) driver.back() # break # exit() return ret if __name__ == "__main__": # a = Login() a = OpenClinics('2024-12-10', AMPM='2', ClinicNo=9) # a = QueryModifyPatBase({'ChartNo': '3009684'}) # a = QueryModifyPatBase({'ChartNo': '8052734'}) # a = ElectronicMedicalReportViewer('3009684', startswith='T0', excluded=['T0_20181011630_T0186538260']) # a = PatientMedicalRecordListQuery('6715701') # a = SimpleQueryOpSchedule('002867', '2018/05/01', '2018/05/31') # a = ReportPathology('2747688') # a = ReportPathology('6369113') # a = QueryInPatientByMonth('003160', 2017, 10) # a = QueryDrIDInfoByID(settings.USER_ID) # a = WardQueryUncompletedChart() # a = WardQueryUncompletedChartCost() # a = SimpleQueryOpScheduleByChartNo('3644408') # a = ShowMedicalRecordDrug("3228492") # a = ElectronicMedicalReportViewer('3009684') # a = OPNoteList('6033296') # a = QueryCase() # a = QueryCaseByChartNo('3906656') # a = QueryCaseByPass('018522','ah651223') MyPrettyPrinter().pprint(a) print(len(a)) # print(a[0].keys())