diff --git a/src/m62nii.py b/src/m62nii.py new file mode 100644 index 0000000..d667295 --- /dev/null +++ b/src/m62nii.py @@ -0,0 +1,340 @@ + +import base64 +import difflib +import hashlib +import os +import shelve +import shutil +import subprocess + +from platipy.dicom.io.rtdose_to_nifti import convert_rtdose +from platipy.dicom.io.rtstruct_to_nifti import convert_rtstruct +from platipy.imaging.dose.dvh import calculate_dvh_for_labels, calculate_d_x, calculate_v_x +from platipy.imaging.visualisation.dose import visualise_dose + +import matplotlib +import matplotlib.pyplot as plt +import pydicom +import SimpleITK as sitk + +GENERAL_DIR = '/mnt/pve/NAS57BEA1_Public/DicomDatabase/GENERAL/' +OUT_ROOT = '/mnt/pve/WORKSPACE/M6/nii' +SHELVE = os.path.join(OUT_ROOT, 'shelve') + +def plot_dvh(dvh): + # Reshape the DVH + plt_dvh = dvh + plt_dvh = plt_dvh.set_index("label") + plt_dvh = plt_dvh.iloc[:,3:].transpose() + + # Plot the DVH + fig, ax = plt.subplots() + plt_dvh.plot(ax=ax, kind="line", colormap=matplotlib.colormaps.get_cmap("rainbow"), legend=False) + + # Add labels and show plot + plt.legend(loc='best') + plt.xlabel("Dose (Gy)") + plt.ylabel("Frequency") + plt.title("Dose Volume Histogram (DVH)") + # plt.show() + plt.savefig('dvh.png') + + +def visualise(ct_image, dose, structures): + fig, df_metrics = visualise_dose( + ct_image, + dose, + structures, + # dvh=dvh, + # d_points=[0, 95], + # v_points=[5], + # d_cc_points=[10], + # structure_for_limits=dose>5, + # expansion_for_limits=40, + # contour_cmap=matplotlib.colormaps.get_cmap("rainbow"), + # dose_cmap=matplotlib.colormaps.get_cmap("inferno"), + # title="TCGA_CV_5977 Dose Metrics", + ) + + + plt.savefig('visualise_dose.png') + +def hashptid(mrn, hosp='NTUH'): + + ptsalt = (mrn+hosp).upper().encode() + hash_in_bytes = hashlib.md5(ptsalt) + + md5 = hash_in_bytes.hexdigest() + hash = base64.b32encode(hash_in_bytes.digest())[:8].decode() + + + # hash32 = base64.b32encode(hash_in_bytes)[:8].decode() + # hash10 = str(int(hashlib.md5(ptsalt).hexdigest(), 16))[-8:] + + return md5, hash + + +def check(epath): + + # Check CT or MR or others + + ds = None + FrameOfReferenceUID = {} + + for root, dirs, files in os.walk(epath): + dirs.sort() + for f in sorted(files): + if f.startswith('RT'): + continue + if f.startswith('SR'): + continue + if f=='DICOMDIR': + continue + print(root, f) + dcm_file = os.path.join(root, f) + ds = pydicom.dcmread(dcm_file) + # print(ds.FrameOfReferenceUID) + # print(dir(ds)) + # exit() + if 'FrameOfReferenceUID' in ds: + frame = { + 'files': len(files), + 'FrameOfReferenceUID': ds.FrameOfReferenceUID, + 'root': root, + } + # print(frame) + if (ds.FrameOfReferenceUID not in FrameOfReferenceUID) or (FrameOfReferenceUID[ds.FrameOfReferenceUID]['files'] < len(files)): + FrameOfReferenceUID[ds.FrameOfReferenceUID] = frame + + break + # if ds is not None: + # break + + # print(FrameOfReferenceUID) + # exit() + + if ds is not None: + + print(ds.PatientID) + print(ds.StudyDate) + print(ds.Modality) + + # in_folder = os.path.os.path.dirname(root) + md5, hash = hashptid(ds.PatientID) + output = os.path.join(OUT_ROOT, hash, ds.StudyDate, ds.Modality) + os.makedirs(output, exist_ok=True) + subprocess.run([ + "dcm2niix", + "-f", '%p_%t_%s', + "-o", output, + "-w", "0", + "-z", "y", + # in_folder, + epath, + ]) + # exit() + + ### Check RT + + RT = {} + for root, dirs, files in os.walk(epath): + dirs.sort() + for f in sorted(files): + if f.startswith('RT'): + + dcm_rt_file = os.path.join(root, f) + ds = pydicom.dcmread(dcm_rt_file) + RT['PatientID'] = ds.PatientID + RT['StudyDate'] = ds.StudyDate + md5, hash = hashptid(RT['PatientID']) + RT['output_dir'] = os.path.join(OUT_ROOT, hash, ds.StudyDate) + # output_dir = os.path.join(OUT_ROOT, ds.PatientID, ds.StudyDate, 'RT') + # output_dir = os.path.join(OUT_ROOT, hash, ds.StudyDate, 'RT') + output_dir = os.path.join(RT['output_dir'], 'RT') + print(ds.Modality, dcm_rt_file, output_dir) + # os.makedirs(output_dir, exist_ok=True) + # print(ds) + # print(dir(ds)) + # print(RT) + # exit() + + # match ds.SOPClassUID: + match ds.Modality: + + # case '1.2.840.10008.5.1.4.1.1.481.2': #RTDOSE + case 'RTDOSE': #RTDOSE + # continue + # platipy.dicom.io.rtdose_to_nifti.convert_rtdose(dcm_dose, force=False, dose_output_path=None) + os.makedirs(output_dir, exist_ok=True) + RT['dose'] = convert_rtdose(dcm_rt_file, force=False, dose_output_path=os.path.join(output_dir, 'dose-%s.nii.gz'%ds.SOPInstanceUID)) + + # case '1.2.840.10008.5.1.4.1.1.481.3': #RTSTRUCT + case 'RTSTRUCT': #RTSS + # print(ds) + # exit() + # continue + RT['StructureSetDate'] = ds.StructureSetDate + +# Structure Set ROI Sequence +# RT ROI Observations Sequence + + ROIbyNumber={} + ROIbyName={} + + # print(type(ds.StructureSetROISequence)) + # print(ds.StructureSetROISequence) + for ds2 in ds.StructureSetROISequence: + if 'ROINumber' in ds2: + seq = { + 'ROINumber': ds2.ROINumber, + 'ReferencedFrameOfReferenceUID': ds2.ReferencedFrameOfReferenceUID, + 'ROIName': ds2.ROIName, + 'ROIGenerationAlgorithm': ds2.ROIGenerationAlgorithm, + } + # print(seq) + ROIbyName[ds2.ROIName] = seq + ROIbyNumber[ds2.ROINumber] = seq + # print(ROIbyNumber) + # exit() + # print(type(ds.RTROIObservationsSequence)) + for ds2 in ds.RTROIObservationsSequence: + if 'ObservationNumber' in ds2: + seq = { + 'ObservationNumber': ds2.ObservationNumber, + 'ReferencedROINumber': ds2.ReferencedROINumber, + # 'ROIObservationLabel': ds2.ROIObservationLabel, + 'RTROIInterpretedType': ds2.RTROIInterpretedType, + 'ROIInterpreter': ds2.ROIInterpreter, + } + ROIName = ROIbyNumber[ds2.ReferencedROINumber]['ROIName'] + ROIbyName[ROIName].update(seq) + + ROINames = ROIbyName.keys() + + # prefix = "Struct_%s_"%ds.StudyDate + prefix = 'Struct_' + ct_image = 'ct_image.nii.gz' + + # continue + # dcm_img = os.path.join(root, 'SRS00000') + + # dcm_img = FrameOfReferenceUID[ds.FrameOfReferenceUID]['root'] # not always work + # print(FrameOfReferenceUID) + # print(ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID) + if ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID not in FrameOfReferenceUID: + continue + dcm_img = FrameOfReferenceUID[ds.ReferencedFrameOfReferenceSequence[0].FrameOfReferenceUID]['root'] + + # platipy.dicom.io.rtstruct_to_nifti.convert_rtstruct(dcm_img, dcm_rt_file, prefix='Struct_', output_dir='.', output_img=None, spacing=None, replace_slashes_with='') + os.makedirs(output_dir, exist_ok=True) + convert_rtstruct(dcm_img, dcm_rt_file, prefix=prefix, output_dir=output_dir, output_img=ct_image, spacing=None, replace_slashes_with='') + RT['structures'] = {} + TV = None + for e in sorted(os.scandir(output_dir), key=lambda e: e.name): + if e.name.startswith(prefix): + s = e.name[len(prefix):-len('.nii.gz')] + matches = difflib.get_close_matches(s, ROINames) + # print(s, ROINames, matches) + if not matches: + continue + ROIName = matches[0] + RTROIInterpretedType = ROIbyName[ROIName]['RTROIInterpretedType'] + # print(s, ROIName, ROIbyName[ROIName]['RTROIInterpretedType']) + + if 'TV' in RTROIInterpretedType: + dst = os.path.join(output_dir, 'TV') + if TV is None: + TV = sitk.ReadImage(e.path) + else: + TV = sitk.Maximum(TV, sitk.ReadImage(e.path)) + else: + dst = os.path.join(output_dir, RTROIInterpretedType) + + os.makedirs(dst, exist_ok=True) + shutil.move(e.path, os.path.join(dst, e.name)) + + + # RT['structures'][s] = sitk.ReadImage(e.path) + if e.name == ct_image: + RT['ct_image'] = sitk.ReadImage(e.path) + + if TV is not None: + RT['TV'] = TV + if 'TV' in RT: + sitk.WriteImage(RT['TV'], os.path.join(output_dir, 'TV-%s.nii.gz'%ds.SOPInstanceUID)) + + # print(RT['structures']) + # exit(0) + + # case '1.2.840.10008.5.1.4.1.1.481.4': #RTRECORD + case 'RTRECORD': #RTRECORD + continue + print(dir(ds)) + print(ds.Manufacturer) + print(ds.ManufacturerModelName) + print(ds.Modality) + exit() + + # case '1.2.840.10008.5.1.4.1.1.481.5': #RTPLAN + case 'RTPLAN': #RTPLAN + continue + # 'PlanIntent', 'PositionReferenceIndicator', 'PrescriptionDescription', 'RTPlanDate', 'RTPlanDescription', 'RTPlanGeometry', 'RTPlanLabel', 'RTPlanName', 'RTPlanTime' + print(dir(ds)) + print(ds.PrescriptionDescription) + print(ds.PlanIntent) + exit() + + + # if 'dose' in RT and 'structures' in RT: + # dvh = calculate_dvh_for_labels(RT['dose'], RT['structures']) + # # print(dvh) + # # print(type(dvh)) + # dvh.to_excel('dvh.xlsx') + # # plot_dvh(dvh) + # # visualise(RT['ct_image'], RT['dose'], RT['structures']) + # exit() + + return ds + + + +def main(): + + # check('/mnt/pve/NAS57BEA1_Public/DicomDatabase/GENERAL/1.2.528.1.1001.200.10.1201.1113.347217019.20221027033523170') + # check('/mnt/pve/NAS57BEA1_Public/DicomDatabase/GENERAL/1.2.528.1.1001.200.10.1201.1113.347217019.20221208063021604') + # check('/mnt/pve/NAS57BEA1_Public/DicomDatabase/GENERAL/1.2.528.1.1001.200.10.1273.4181.347217019.20210716082047818') + # check('/mnt/pve/NAS57BEA1_Public/DicomDatabase/GENERAL/1.2.528.1.1001.200.10.2017.2749.347217019.20220907095200360') + # check('/mnt/pve/NAS57BEA1_Public/DicomDatabase/GENERAL/1.2.528.1.1001.200.10.4777.693.347217019.20230721034215278') + # exit() + + os.makedirs(OUT_ROOT, exist_ok=True) + d = shelve.open(SHELVE) # open -- file may get suffix added by low-level + + for e in sorted(os.scandir(GENERAL_DIR), key=lambda e: e.name): + if e.is_dir(): + if e.name in d: + print('skip', e.name) + continue + ret = check(e.path) + d[e.name] = ret + d.sync() + + + +if __name__ == '__main__': + main() + + + +''' +SOP UID SOP name +1.2.840.10008.5.1.4.1.1.481.1 Radiation Therapy Image Storage +1.2.840.10008.5.1.4.1.1.481.2 Radiation Therapy Dose Storage +1.2.840.10008.5.1.4.1.1.481.3 Radiation Therapy Structure Set Storage +1.2.840.10008.5.1.4.1.1.481.4 Radiation Therapy Beams Treatment Record Storage +1.2.840.10008.5.1.4.1.1.481.5 Radiation Therapy Plan Storage +1.2.840.10008.5.1.4.1.1.481.6 Radiation Therapy Brachy Treatment Record Storage +1.2.840.10008.5.1.4.1.1.481.7 Radiation Therapy Treatment Summary Record Storage +1.2.840.10008.5.1.4.1.1.481.8 Radiation Therapy Ion Plan Storage +1.2.840.10008.5.1.4.1.1.481.9 Radiation Therapy Ion Beams Treatment Record Storage +'''