import base64 import difflib import hashlib import os import shelve import shutil import subprocess import traceback from platipy.dicom.io.rtdose_to_nifti import convert_rtdose from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct import lxml.etree import lxml.objectify import pydicom import SimpleITK as sitk PATIENTS_ROOT='/mnt/SRS/NTUH/accuray/database/patients/' OUT_ROOT = '/mnt/1220/Public/dataset2/G4' SHELVE = os.path.join(OUT_ROOT, 'shelve') def hashptid(mrn, hosp='NTUH'): ptsalt = (mrn+hosp).upper().encode() hash_in_bytes = hashlib.md5(ptsalt) md5 = hash_in_bytes.hexdigest() hash = base64.b32encode(hash_in_bytes.digest())[:8].decode() # hash32 = base64.b32encode(hash_in_bytes)[:8].decode() # hash10 = str(int(hashlib.md5(ptsalt).hexdigest(), 16))[-8:] return md5, hash def check(epath): # Check CT or MR or others ds = None FrameOfReferenceUID = {} for root, dirs, files in os.walk(os.path.join(epath, 'ct/patient')): dirs.sort() for f in sorted(files): if f.startswith('RT'): continue if f.startswith('SR'): continue if f=='DICOMDIR': continue print(root, f) dcm_file = os.path.join(root, f) ds = pydicom.dcmread(dcm_file, force=True) # print(dir(ds)) # exit() if 'PatientID' not in ds: continue # print(ds, file=open('%s.txt'%f, 'w')) # exit() # print(ds.PatientID) # exit() md5, hash = hashptid(ds.PatientID) output = os.path.join(OUT_ROOT, hash, ds.StudyDate, ds.Modality) os.makedirs(output, exist_ok=True) subprocess.run([ "dcm2niix", "-f", '%p_%t_%s', "-o", output, "-w", "0", "-z", "y", root, # epath, ]) if 'FrameOfReferenceUID' in ds: frame = { 'files': len(files), 'FrameOfReferenceUID': ds.FrameOfReferenceUID, 'root': root, } # print(frame) if (ds.FrameOfReferenceUID not in FrameOfReferenceUID) or (FrameOfReferenceUID[ds.FrameOfReferenceUID]['files'] < len(files)): FrameOfReferenceUID[ds.FrameOfReferenceUID] = frame break # if ds is not None: # break # print(FrameOfReferenceUID) # exit() ### Check RT VOI = {} PLAN_DIRS = [] for root, dirs, files in os.walk(os.path.join(epath, 'tps')): dirs.sort() for f in sorted(files): if f.endswith('xml'): fileobject = os.path.join(root, f) print(fileobject) # shutil.copy(os.path.join(root, f), 'test.xml') # tree = lxml.objectify.parse(fileobject) try: tree = lxml.objectify.parse(fileobject) except: print(traceback.format_exc()) continue # parser = lxml.etree.XMLParser(recover=True) # tree = lxml.objectify.parse(fileobject, parser=parser) plan = tree.getroot() # print(plan.PLAN_PROFILE) # print(dir(plan)) # print(plan.keys()) # print(dir(plan.VOISET)) # print(plan.VOISET.VOI[0].NAME) # print(plan.VOISET.VOI[1].NAME) # exit() PLAN_STATE = 0 if hasattr(plan, 'PLAN_PROFILE'): # 1 DELIVERABLE if hasattr(plan.PLAN_PROFILE, 'PLAN_STATE'): PLAN_STATE = int(plan.PLAN_PROFILE.PLAN_STATE) else: PLAN_STATE = int(plan.PLAN_PROFILE.DELIVERABLE_PLAN) if PLAN_STATE == 0: continue # print(PLAN_STATE, f, root) PLAN_DIRS.append(root) if hasattr(plan, 'VOISET'): for v in plan.VOISET.VOI: # 0 TV, 1 ORGAN VOI[str(v.NAME)] = v.TYPE # print(v.TYPE, v.NAME, type(v.NAME)) # exit() # print(PLAN_DIRS) # print(VOI) # exit() for d in PLAN_DIRS: RT={} for root, dirs, files in os.walk(d): dirs.sort() for f in sorted(files): if f.endswith('dcm'): dcm_rt_file = os.path.join(root, f) ds = pydicom.dcmread(dcm_rt_file) RT['PatientID'] = ds.PatientID RT['StudyDate'] = ds.StudyDate md5, hash = hashptid(RT['PatientID']) RT['output_dir'] = os.path.join(OUT_ROOT, hash, ds.StudyDate) output_dir = os.path.join(RT['output_dir'], 'RT') print(ds.Modality, dcm_rt_file, output_dir) match ds.Modality: # if f.endswith('rtdose.dcm'): case 'RTDOSE': #RTDOSE os.makedirs(output_dir, exist_ok=True) RT['dose'] = convert_rtdose(dcm_rt_file, force=False, dose_output_path=os.path.join(output_dir, 'dose-%s.nii.gz'%ds.SOPInstanceUID)) # elif f.endswith('rtss.dcm'): case 'RTSTRUCT': #RTSS RT['StructureSetDate'] = ds.StructureSetDate # print(ds) ROIbyNumber={} ROIbyName={} # print(ds, file=open('%s.txt'%f, 'w')) if 'StructureSetROISequence' not in ds: continue for ds2 in ds.StructureSetROISequence: if 'ROINumber' in ds2: seq = { 'ROINumber': ds2.ROINumber, 'ReferencedFrameOfReferenceUID': ds2.ReferencedFrameOfReferenceUID, 'ROIName': ds2.ROIName, 'ROIGenerationAlgorithm': ds2.ROIGenerationAlgorithm, } # print(seq) ROIbyName[ds2.ROIName] = seq ROIbyNumber[ds2.ROINumber] = seq for ds2 in ds.RTROIObservationsSequence: if 'ObservationNumber' in ds2: seq = { 'ObservationNumber': ds2.ObservationNumber, 'ReferencedROINumber': ds2.ReferencedROINumber, # 'ROIObservationLabel': ds2.ROIObservationLabel, 'RTROIInterpretedType': ds2.RTROIInterpretedType, 'ROIInterpreter': ds2.ROIInterpreter, } ROIName = ROIbyNumber[ds2.ReferencedROINumber]['ROIName'] ROIbyName[ROIName].update(seq) ROINames = ROIbyName.keys() # ROINames = VOI.keys() prefix = 'Struct_' ct_image = 'ct_image.nii.gz' # print(dir(ds.ReferencedFrameOfReferenceSequence[0])) if ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID not in FrameOfReferenceUID: continue dcm_img = FrameOfReferenceUID[ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID]['root'] # dcm_img = FrameOfReferenceUID[ds.FrameOfReferenceUID] os.makedirs(output_dir, exist_ok=True) print(dcm_img, dcm_rt_file) try: convert_rtstruct(dcm_img, dcm_rt_file, prefix=prefix, output_dir=output_dir, output_img=ct_image, spacing=None, replace_slashes_with='') except: print(traceback.format_exc()) continue RT['structures'] = {} TV = None for e in sorted(os.scandir(output_dir), key=lambda e: e.name): if e.name.startswith(prefix): s = e.name[len(prefix):-len('.nii.gz')] # print(e.name, s, ROINames) # print(difflib.get_close_matches(s, ROINames)) ROIName = difflib.get_close_matches(s, ROINames)[0] RTROIInterpretedType = ROIbyName[ROIName]['RTROIInterpretedType'] if 'TV' in RTROIInterpretedType: dst = os.path.join(output_dir, 'TV') if TV is None: TV = sitk.ReadImage(e.path) else: TV = sitk.Maximum(TV, sitk.ReadImage(e.path)) else: dst = os.path.join(output_dir, RTROIInterpretedType) # if VOI[ROIName] == 0: # dst = os.path.join(output_dir, 'TV') # if TV is None: # TV = sitk.ReadImage(e.path) # else: # TV = sitk.Maximum(TV, sitk.ReadImage(e.path)) # else: # dst = os.path.join(output_dir, 'ORGAN') os.makedirs(dst, exist_ok=True) shutil.move(e.path, os.path.join(dst, e.name)) # RT['structures'][s] = sitk.ReadImage(e.path) if e.name == ct_image: RT['ct_image'] = sitk.ReadImage(e.path) if TV is not None: RT['TV'] = TV if 'TV' in RT: sitk.WriteImage(RT['TV'], os.path.join(output_dir, 'TV-%s.nii.gz'%ds.SOPInstanceUID)) # exit() return ds def main(): # check('/mnt/SRS/NTUH/accuray/database/patients/Chain_6135805') # check('/mnt/SRS/NTUH/accuray/database/patients/HUSS_3777579') # check('/mnt/SRS/NTUH/accuray/database/patients/Hong_2265585') # check('/mnt/SRS/NTUH/accuray/database/patients/Hong_3444549') # exit() os.makedirs(OUT_ROOT, exist_ok=True) d = shelve.open(SHELVE) # open -- file may get suffix added by low-level for e in sorted(os.scandir(PATIENTS_ROOT), key=lambda e: e.name): if e.is_dir(): if e.name in d: print('skip', e.name) continue ret = check(e.path) d[e.name] = ret d.sync() # exit() d.close() if __name__ == '__main__': main()