298 lines
12 KiB
Python
298 lines
12 KiB
Python
![]() |
|
||
|
import base64
|
||
|
import difflib
|
||
|
import hashlib
|
||
|
import os
|
||
|
import shelve
|
||
|
import shutil
|
||
|
import subprocess
|
||
|
import traceback
|
||
|
|
||
|
from platipy.dicom.io.rtdose_to_nifti import convert_rtdose
|
||
|
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct
|
||
|
|
||
|
import lxml.etree
|
||
|
import lxml.objectify
|
||
|
import pydicom
|
||
|
import SimpleITK as sitk
|
||
|
|
||
|
PATIENTS_ROOT='/mnt/SRS/NTUH/accuray/database/patients/'
|
||
|
OUT_ROOT = '/mnt/1220/Public/dataset2/G4'
|
||
|
SHELVE = os.path.join(OUT_ROOT, 'shelve')
|
||
|
|
||
|
def hashptid(mrn, hosp='NTUH'):
|
||
|
|
||
|
ptsalt = (mrn+hosp).upper().encode()
|
||
|
hash_in_bytes = hashlib.md5(ptsalt)
|
||
|
|
||
|
md5 = hash_in_bytes.hexdigest()
|
||
|
hash = base64.b32encode(hash_in_bytes.digest())[:8].decode()
|
||
|
|
||
|
|
||
|
# hash32 = base64.b32encode(hash_in_bytes)[:8].decode()
|
||
|
# hash10 = str(int(hashlib.md5(ptsalt).hexdigest(), 16))[-8:]
|
||
|
|
||
|
return md5, hash
|
||
|
|
||
|
|
||
|
def check(epath):
|
||
|
|
||
|
# Check CT or MR or others
|
||
|
|
||
|
ds = None
|
||
|
FrameOfReferenceUID = {}
|
||
|
|
||
|
for root, dirs, files in os.walk(os.path.join(epath, 'ct/patient')):
|
||
|
dirs.sort()
|
||
|
for f in sorted(files):
|
||
|
if f.startswith('RT'):
|
||
|
continue
|
||
|
if f.startswith('SR'):
|
||
|
continue
|
||
|
if f=='DICOMDIR':
|
||
|
continue
|
||
|
print(root, f)
|
||
|
dcm_file = os.path.join(root, f)
|
||
|
ds = pydicom.dcmread(dcm_file, force=True)
|
||
|
# print(dir(ds))
|
||
|
# exit()
|
||
|
|
||
|
if 'PatientID' not in ds:
|
||
|
continue
|
||
|
# print(ds, file=open('%s.txt'%f, 'w'))
|
||
|
# exit()
|
||
|
# print(ds.PatientID)
|
||
|
# exit()
|
||
|
md5, hash = hashptid(ds.PatientID)
|
||
|
output = os.path.join(OUT_ROOT, hash, ds.StudyDate, ds.Modality)
|
||
|
os.makedirs(output, exist_ok=True)
|
||
|
subprocess.run([
|
||
|
"dcm2niix",
|
||
|
"-f", '%p_%t_%s',
|
||
|
"-o", output,
|
||
|
"-w", "0",
|
||
|
"-z", "y",
|
||
|
root,
|
||
|
# epath,
|
||
|
])
|
||
|
|
||
|
|
||
|
if 'FrameOfReferenceUID' in ds:
|
||
|
frame = {
|
||
|
'files': len(files),
|
||
|
'FrameOfReferenceUID': ds.FrameOfReferenceUID,
|
||
|
'root': root,
|
||
|
}
|
||
|
# print(frame)
|
||
|
if (ds.FrameOfReferenceUID not in FrameOfReferenceUID) or (FrameOfReferenceUID[ds.FrameOfReferenceUID]['files'] < len(files)):
|
||
|
FrameOfReferenceUID[ds.FrameOfReferenceUID] = frame
|
||
|
|
||
|
break
|
||
|
# if ds is not None:
|
||
|
# break
|
||
|
|
||
|
# print(FrameOfReferenceUID)
|
||
|
# exit()
|
||
|
|
||
|
|
||
|
### Check RT
|
||
|
VOI = {}
|
||
|
PLAN_DIRS = []
|
||
|
for root, dirs, files in os.walk(os.path.join(epath, 'tps')):
|
||
|
dirs.sort()
|
||
|
for f in sorted(files):
|
||
|
if f.endswith('xml'):
|
||
|
fileobject = os.path.join(root, f)
|
||
|
print(fileobject)
|
||
|
# shutil.copy(os.path.join(root, f), 'test.xml')
|
||
|
|
||
|
# tree = lxml.objectify.parse(fileobject)
|
||
|
try:
|
||
|
tree = lxml.objectify.parse(fileobject)
|
||
|
except:
|
||
|
print(traceback.format_exc())
|
||
|
continue
|
||
|
|
||
|
# parser = lxml.etree.XMLParser(recover=True)
|
||
|
# tree = lxml.objectify.parse(fileobject, parser=parser)
|
||
|
|
||
|
plan = tree.getroot()
|
||
|
# print(plan.PLAN_PROFILE)
|
||
|
# print(dir(plan))
|
||
|
# print(plan.keys())
|
||
|
# print(dir(plan.VOISET))
|
||
|
# print(plan.VOISET.VOI[0].NAME)
|
||
|
# print(plan.VOISET.VOI[1].NAME)
|
||
|
# exit()
|
||
|
|
||
|
PLAN_STATE = 0
|
||
|
if hasattr(plan, 'PLAN_PROFILE'): # 1 DELIVERABLE
|
||
|
if hasattr(plan.PLAN_PROFILE, 'PLAN_STATE'):
|
||
|
PLAN_STATE = int(plan.PLAN_PROFILE.PLAN_STATE)
|
||
|
else:
|
||
|
PLAN_STATE = int(plan.PLAN_PROFILE.DELIVERABLE_PLAN)
|
||
|
if PLAN_STATE == 0:
|
||
|
continue
|
||
|
|
||
|
# print(PLAN_STATE, f, root)
|
||
|
PLAN_DIRS.append(root)
|
||
|
if hasattr(plan, 'VOISET'):
|
||
|
for v in plan.VOISET.VOI: # 0 TV, 1 ORGAN
|
||
|
VOI[str(v.NAME)] = v.TYPE
|
||
|
# print(v.TYPE, v.NAME, type(v.NAME))
|
||
|
# exit()
|
||
|
# print(PLAN_DIRS)
|
||
|
# print(VOI)
|
||
|
# exit()
|
||
|
|
||
|
for d in PLAN_DIRS:
|
||
|
RT={}
|
||
|
for root, dirs, files in os.walk(d):
|
||
|
dirs.sort()
|
||
|
for f in sorted(files):
|
||
|
if f.endswith('dcm'):
|
||
|
|
||
|
dcm_rt_file = os.path.join(root, f)
|
||
|
ds = pydicom.dcmread(dcm_rt_file)
|
||
|
RT['PatientID'] = ds.PatientID
|
||
|
RT['StudyDate'] = ds.StudyDate
|
||
|
md5, hash = hashptid(RT['PatientID'])
|
||
|
RT['output_dir'] = os.path.join(OUT_ROOT, hash, ds.StudyDate)
|
||
|
output_dir = os.path.join(RT['output_dir'], 'RT')
|
||
|
print(ds.Modality, dcm_rt_file, output_dir)
|
||
|
|
||
|
match ds.Modality:
|
||
|
# if f.endswith('rtdose.dcm'):
|
||
|
case 'RTDOSE': #RTDOSE
|
||
|
os.makedirs(output_dir, exist_ok=True)
|
||
|
RT['dose'] = convert_rtdose(dcm_rt_file, force=False, dose_output_path=os.path.join(output_dir, 'dose-%s.nii.gz'%ds.SOPInstanceUID))
|
||
|
# elif f.endswith('rtss.dcm'):
|
||
|
case 'RTSTRUCT': #RTSS
|
||
|
|
||
|
RT['StructureSetDate'] = ds.StructureSetDate
|
||
|
|
||
|
# print(ds)
|
||
|
|
||
|
ROIbyNumber={}
|
||
|
ROIbyName={}
|
||
|
|
||
|
# print(ds, file=open('%s.txt'%f, 'w'))
|
||
|
|
||
|
if 'StructureSetROISequence' not in ds:
|
||
|
continue
|
||
|
|
||
|
for ds2 in ds.StructureSetROISequence:
|
||
|
if 'ROINumber' in ds2:
|
||
|
seq = {
|
||
|
'ROINumber': ds2.ROINumber,
|
||
|
'ReferencedFrameOfReferenceUID': ds2.ReferencedFrameOfReferenceUID,
|
||
|
'ROIName': ds2.ROIName,
|
||
|
'ROIGenerationAlgorithm': ds2.ROIGenerationAlgorithm,
|
||
|
}
|
||
|
# print(seq)
|
||
|
ROIbyName[ds2.ROIName] = seq
|
||
|
ROIbyNumber[ds2.ROINumber] = seq
|
||
|
|
||
|
for ds2 in ds.RTROIObservationsSequence:
|
||
|
if 'ObservationNumber' in ds2:
|
||
|
seq = {
|
||
|
'ObservationNumber': ds2.ObservationNumber,
|
||
|
'ReferencedROINumber': ds2.ReferencedROINumber,
|
||
|
# 'ROIObservationLabel': ds2.ROIObservationLabel,
|
||
|
'RTROIInterpretedType': ds2.RTROIInterpretedType,
|
||
|
'ROIInterpreter': ds2.ROIInterpreter,
|
||
|
}
|
||
|
ROIName = ROIbyNumber[ds2.ReferencedROINumber]['ROIName']
|
||
|
ROIbyName[ROIName].update(seq)
|
||
|
|
||
|
ROINames = ROIbyName.keys()
|
||
|
# ROINames = VOI.keys()
|
||
|
|
||
|
prefix = 'Struct_'
|
||
|
ct_image = 'ct_image.nii.gz'
|
||
|
|
||
|
# print(dir(ds.ReferencedFrameOfReferenceSequence[0]))
|
||
|
|
||
|
if ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID not in FrameOfReferenceUID:
|
||
|
continue
|
||
|
|
||
|
dcm_img = FrameOfReferenceUID[ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID]['root']
|
||
|
# dcm_img = FrameOfReferenceUID[ds.FrameOfReferenceUID]
|
||
|
os.makedirs(output_dir, exist_ok=True)
|
||
|
print(dcm_img, dcm_rt_file)
|
||
|
try:
|
||
|
convert_rtstruct(dcm_img, dcm_rt_file, prefix=prefix, output_dir=output_dir, output_img=ct_image, spacing=None, replace_slashes_with='')
|
||
|
except:
|
||
|
print(traceback.format_exc())
|
||
|
continue
|
||
|
RT['structures'] = {}
|
||
|
TV = None
|
||
|
for e in sorted(os.scandir(output_dir), key=lambda e: e.name):
|
||
|
if e.name.startswith(prefix):
|
||
|
s = e.name[len(prefix):-len('.nii.gz')]
|
||
|
# print(e.name, s, ROINames)
|
||
|
# print(difflib.get_close_matches(s, ROINames))
|
||
|
ROIName = difflib.get_close_matches(s, ROINames)[0]
|
||
|
RTROIInterpretedType = ROIbyName[ROIName]['RTROIInterpretedType']
|
||
|
|
||
|
if 'TV' in RTROIInterpretedType:
|
||
|
dst = os.path.join(output_dir, 'TV')
|
||
|
if TV is None:
|
||
|
TV = sitk.ReadImage(e.path)
|
||
|
else:
|
||
|
TV = sitk.Maximum(TV, sitk.ReadImage(e.path))
|
||
|
else:
|
||
|
dst = os.path.join(output_dir, RTROIInterpretedType)
|
||
|
|
||
|
# if VOI[ROIName] == 0:
|
||
|
# dst = os.path.join(output_dir, 'TV')
|
||
|
# if TV is None:
|
||
|
# TV = sitk.ReadImage(e.path)
|
||
|
# else:
|
||
|
# TV = sitk.Maximum(TV, sitk.ReadImage(e.path))
|
||
|
# else:
|
||
|
# dst = os.path.join(output_dir, 'ORGAN')
|
||
|
|
||
|
os.makedirs(dst, exist_ok=True)
|
||
|
shutil.move(e.path, os.path.join(dst, e.name))
|
||
|
|
||
|
|
||
|
# RT['structures'][s] = sitk.ReadImage(e.path)
|
||
|
if e.name == ct_image:
|
||
|
RT['ct_image'] = sitk.ReadImage(e.path)
|
||
|
|
||
|
if TV is not None:
|
||
|
RT['TV'] = TV
|
||
|
if 'TV' in RT:
|
||
|
sitk.WriteImage(RT['TV'], os.path.join(output_dir, 'TV-%s.nii.gz'%ds.SOPInstanceUID))
|
||
|
|
||
|
# exit()
|
||
|
return ds
|
||
|
|
||
|
|
||
|
|
||
|
def main():
|
||
|
|
||
|
# check('/mnt/SRS/NTUH/accuray/database/patients/Chain_6135805')
|
||
|
# check('/mnt/SRS/NTUH/accuray/database/patients/HUSS_3777579')
|
||
|
# check('/mnt/SRS/NTUH/accuray/database/patients/Hong_2265585')
|
||
|
# check('/mnt/SRS/NTUH/accuray/database/patients/Hong_3444549')
|
||
|
# exit()
|
||
|
|
||
|
os.makedirs(OUT_ROOT, exist_ok=True)
|
||
|
d = shelve.open(SHELVE) # open -- file may get suffix added by low-level
|
||
|
|
||
|
for e in sorted(os.scandir(PATIENTS_ROOT), key=lambda e: e.name):
|
||
|
if e.is_dir():
|
||
|
if e.name in d:
|
||
|
print('skip', e.name)
|
||
|
continue
|
||
|
ret = check(e.path)
|
||
|
d[e.name] = ret
|
||
|
d.sync()
|
||
|
# exit()
|
||
|
d.close()
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
main()
|