ck-preprocess/accuray.py

298 lines
12 KiB
Python
Raw Normal View History

2025-02-01 22:30:53 +00:00
import base64
import difflib
import hashlib
import os
import shelve
import shutil
import subprocess
import traceback
from platipy.dicom.io.rtdose_to_nifti import convert_rtdose
from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct
import lxml.etree
import lxml.objectify
import pydicom
import SimpleITK as sitk
PATIENTS_ROOT='/mnt/SRS/NTUH/accuray/database/patients/'
OUT_ROOT = '/mnt/1220/Public/dataset2/G4'
SHELVE = os.path.join(OUT_ROOT, 'shelve')
def hashptid(mrn, hosp='NTUH'):
ptsalt = (mrn+hosp).upper().encode()
hash_in_bytes = hashlib.md5(ptsalt)
md5 = hash_in_bytes.hexdigest()
hash = base64.b32encode(hash_in_bytes.digest())[:8].decode()
# hash32 = base64.b32encode(hash_in_bytes)[:8].decode()
# hash10 = str(int(hashlib.md5(ptsalt).hexdigest(), 16))[-8:]
return md5, hash
def check(epath):
# Check CT or MR or others
ds = None
FrameOfReferenceUID = {}
for root, dirs, files in os.walk(os.path.join(epath, 'ct/patient')):
dirs.sort()
for f in sorted(files):
if f.startswith('RT'):
continue
if f.startswith('SR'):
continue
if f=='DICOMDIR':
continue
print(root, f)
dcm_file = os.path.join(root, f)
ds = pydicom.dcmread(dcm_file, force=True)
# print(dir(ds))
# exit()
if 'PatientID' not in ds:
continue
# print(ds, file=open('%s.txt'%f, 'w'))
# exit()
# print(ds.PatientID)
# exit()
md5, hash = hashptid(ds.PatientID)
output = os.path.join(OUT_ROOT, hash, ds.StudyDate, ds.Modality)
os.makedirs(output, exist_ok=True)
subprocess.run([
"dcm2niix",
"-f", '%p_%t_%s',
"-o", output,
"-w", "0",
"-z", "y",
root,
# epath,
])
if 'FrameOfReferenceUID' in ds:
frame = {
'files': len(files),
'FrameOfReferenceUID': ds.FrameOfReferenceUID,
'root': root,
}
# print(frame)
if (ds.FrameOfReferenceUID not in FrameOfReferenceUID) or (FrameOfReferenceUID[ds.FrameOfReferenceUID]['files'] < len(files)):
FrameOfReferenceUID[ds.FrameOfReferenceUID] = frame
break
# if ds is not None:
# break
# print(FrameOfReferenceUID)
# exit()
### Check RT
VOI = {}
PLAN_DIRS = []
for root, dirs, files in os.walk(os.path.join(epath, 'tps')):
dirs.sort()
for f in sorted(files):
if f.endswith('xml'):
fileobject = os.path.join(root, f)
print(fileobject)
# shutil.copy(os.path.join(root, f), 'test.xml')
# tree = lxml.objectify.parse(fileobject)
try:
tree = lxml.objectify.parse(fileobject)
except:
print(traceback.format_exc())
continue
# parser = lxml.etree.XMLParser(recover=True)
# tree = lxml.objectify.parse(fileobject, parser=parser)
plan = tree.getroot()
# print(plan.PLAN_PROFILE)
# print(dir(plan))
# print(plan.keys())
# print(dir(plan.VOISET))
# print(plan.VOISET.VOI[0].NAME)
# print(plan.VOISET.VOI[1].NAME)
# exit()
PLAN_STATE = 0
if hasattr(plan, 'PLAN_PROFILE'): # 1 DELIVERABLE
if hasattr(plan.PLAN_PROFILE, 'PLAN_STATE'):
PLAN_STATE = int(plan.PLAN_PROFILE.PLAN_STATE)
else:
PLAN_STATE = int(plan.PLAN_PROFILE.DELIVERABLE_PLAN)
if PLAN_STATE == 0:
continue
# print(PLAN_STATE, f, root)
PLAN_DIRS.append(root)
if hasattr(plan, 'VOISET'):
for v in plan.VOISET.VOI: # 0 TV, 1 ORGAN
VOI[str(v.NAME)] = v.TYPE
# print(v.TYPE, v.NAME, type(v.NAME))
# exit()
# print(PLAN_DIRS)
# print(VOI)
# exit()
for d in PLAN_DIRS:
RT={}
for root, dirs, files in os.walk(d):
dirs.sort()
for f in sorted(files):
if f.endswith('dcm'):
dcm_rt_file = os.path.join(root, f)
ds = pydicom.dcmread(dcm_rt_file)
RT['PatientID'] = ds.PatientID
RT['StudyDate'] = ds.StudyDate
md5, hash = hashptid(RT['PatientID'])
RT['output_dir'] = os.path.join(OUT_ROOT, hash, ds.StudyDate)
output_dir = os.path.join(RT['output_dir'], 'RT')
print(ds.Modality, dcm_rt_file, output_dir)
match ds.Modality:
# if f.endswith('rtdose.dcm'):
case 'RTDOSE': #RTDOSE
os.makedirs(output_dir, exist_ok=True)
RT['dose'] = convert_rtdose(dcm_rt_file, force=False, dose_output_path=os.path.join(output_dir, 'dose-%s.nii.gz'%ds.SOPInstanceUID))
# elif f.endswith('rtss.dcm'):
case 'RTSTRUCT': #RTSS
RT['StructureSetDate'] = ds.StructureSetDate
# print(ds)
ROIbyNumber={}
ROIbyName={}
# print(ds, file=open('%s.txt'%f, 'w'))
if 'StructureSetROISequence' not in ds:
continue
for ds2 in ds.StructureSetROISequence:
if 'ROINumber' in ds2:
seq = {
'ROINumber': ds2.ROINumber,
'ReferencedFrameOfReferenceUID': ds2.ReferencedFrameOfReferenceUID,
'ROIName': ds2.ROIName,
'ROIGenerationAlgorithm': ds2.ROIGenerationAlgorithm,
}
# print(seq)
ROIbyName[ds2.ROIName] = seq
ROIbyNumber[ds2.ROINumber] = seq
for ds2 in ds.RTROIObservationsSequence:
if 'ObservationNumber' in ds2:
seq = {
'ObservationNumber': ds2.ObservationNumber,
'ReferencedROINumber': ds2.ReferencedROINumber,
# 'ROIObservationLabel': ds2.ROIObservationLabel,
'RTROIInterpretedType': ds2.RTROIInterpretedType,
'ROIInterpreter': ds2.ROIInterpreter,
}
ROIName = ROIbyNumber[ds2.ReferencedROINumber]['ROIName']
ROIbyName[ROIName].update(seq)
ROINames = ROIbyName.keys()
# ROINames = VOI.keys()
prefix = 'Struct_'
ct_image = 'ct_image.nii.gz'
# print(dir(ds.ReferencedFrameOfReferenceSequence[0]))
if ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID not in FrameOfReferenceUID:
continue
dcm_img = FrameOfReferenceUID[ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID]['root']
# dcm_img = FrameOfReferenceUID[ds.FrameOfReferenceUID]
os.makedirs(output_dir, exist_ok=True)
print(dcm_img, dcm_rt_file)
try:
convert_rtstruct(dcm_img, dcm_rt_file, prefix=prefix, output_dir=output_dir, output_img=ct_image, spacing=None, replace_slashes_with='')
except:
print(traceback.format_exc())
continue
RT['structures'] = {}
TV = None
for e in sorted(os.scandir(output_dir), key=lambda e: e.name):
if e.name.startswith(prefix):
s = e.name[len(prefix):-len('.nii.gz')]
# print(e.name, s, ROINames)
# print(difflib.get_close_matches(s, ROINames))
ROIName = difflib.get_close_matches(s, ROINames)[0]
RTROIInterpretedType = ROIbyName[ROIName]['RTROIInterpretedType']
if 'TV' in RTROIInterpretedType:
dst = os.path.join(output_dir, 'TV')
if TV is None:
TV = sitk.ReadImage(e.path)
else:
TV = sitk.Maximum(TV, sitk.ReadImage(e.path))
else:
dst = os.path.join(output_dir, RTROIInterpretedType)
# if VOI[ROIName] == 0:
# dst = os.path.join(output_dir, 'TV')
# if TV is None:
# TV = sitk.ReadImage(e.path)
# else:
# TV = sitk.Maximum(TV, sitk.ReadImage(e.path))
# else:
# dst = os.path.join(output_dir, 'ORGAN')
os.makedirs(dst, exist_ok=True)
shutil.move(e.path, os.path.join(dst, e.name))
# RT['structures'][s] = sitk.ReadImage(e.path)
if e.name == ct_image:
RT['ct_image'] = sitk.ReadImage(e.path)
if TV is not None:
RT['TV'] = TV
if 'TV' in RT:
sitk.WriteImage(RT['TV'], os.path.join(output_dir, 'TV-%s.nii.gz'%ds.SOPInstanceUID))
# exit()
return ds
def main():
# check('/mnt/SRS/NTUH/accuray/database/patients/Chain_6135805')
# check('/mnt/SRS/NTUH/accuray/database/patients/HUSS_3777579')
# check('/mnt/SRS/NTUH/accuray/database/patients/Hong_2265585')
# check('/mnt/SRS/NTUH/accuray/database/patients/Hong_3444549')
# exit()
os.makedirs(OUT_ROOT, exist_ok=True)
d = shelve.open(SHELVE) # open -- file may get suffix added by low-level
for e in sorted(os.scandir(PATIENTS_ROOT), key=lambda e: e.name):
if e.is_dir():
if e.name in d:
print('skip', e.name)
continue
ret = check(e.path)
d[e.name] = ret
d.sync()
# exit()
d.close()
if __name__ == '__main__':
main()