adm-ntuh-net/ntuh/research/x_classifier.py
2024-12-12 10:19:16 +08:00

93 lines
2.4 KiB
Python
Executable file

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)
from django.core.management import setup_environ
import settings
setup_environ(settings)
#from django.conf import settings
import os
os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
from django.db.models.loading import get_models
loaded_models = get_models()
########
import datetime
import re
from categories.models import *
from registry.models import *
import nltk
def write_mallet():
#for opn in OPNote.objects.extra(order_by = ['date']).all():
# print opn.date
pattern = u"([0-9][0-9]:[0-9][0-9])</TD><TD>手術開始"
file = open('/tmp/op.txt', 'w')
file_an = open('/tmp/op_an.txt', 'w')
#for ops in OPSchedule.objects.extra(order_by = ['-OPDate']).all():
for ops in OPSchedule.objects.extra(order_by = ['id']).all():
if not ops.category:
continue
start_date = ops.OPDate
end_date = ops.OPDate + datetime.timedelta(days=1)
opnlist = OPNote.objects.filter(PatChartNo=ops.PatChartNo,date__range=(start_date, end_date))
if len(opnlist) == 0:
continue
elif len(opnlist) == 1:
opn = opnlist[0]
else:
opn = None
for op in opnlist:
m = re.search(pattern, op.content)
# if m:
# print m.group(1)
if m and m.group(1) == ops.StartTime.strftime('%H:%M'):
opn = op
break
if not opn:
continue
cont_orig = nltk.clean_html(opn.content)
cont = re.sub('\s+', ' ', cont_orig)
print >>file, ops.id, 'C%02d'%ops.category.id, cont
print ops.id, 'C%02d'%ops.category.id, cont
cont = re.sub('\W', ' ', cont_orig)
cont = re.sub('\s+', ' ', cont)
print >>file_an, ops.id, 'C%02d'%ops.category.id, cont
# exit()
#
# print ops.id, ops.PatChartNo, ops.StartTime.strftime('%H:%M'), start_date, opn.date
def print_cat():
for cat in Category.objects.extra(order_by = ['id']).all():
print 'C%02d'%cat.id, cat
write_mallet()
#print_cat()