| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242 |
- """
- Convert LiTS 2017 (Liver Tumor Segmentation) data into UNet3+ data format
- LiTS: https://competitions.codalab.org/competitions/17094
- """
- import os
- import sys
- from glob import glob
- from pathlib import Path
- from tqdm import tqdm
- import numpy as np
- import multiprocessing as mp
- import cv2
- import nibabel as nib
- import hydra
- from omegaconf import DictConfig
- sys.path.append(os.path.abspath("./"))
- from utils.general_utils import create_directory, join_paths
- from utils.images_utils import resize_image
- def read_nii(filepath):
- """
- Reads .nii file and returns pixel array
- """
- ct_scan = nib.load(filepath).get_fdata()
- # TODO: Verify images orientation
- # in both train and test set, especially on train scan 130
- ct_scan = np.rot90(np.array(ct_scan))
- return ct_scan
- def crop_center(img, croph, cropw):
- """
- Center crop on given height and width
- """
- height, width = img.shape[:2]
- starth = height // 2 - (croph // 2)
- startw = width // 2 - (cropw // 2)
- return img[starth:starth + croph, startw:startw + cropw, :]
- def linear_scale(img):
- """
- First convert image to range of 0-1 and them scale to 255
- """
- img = (img - img.min(axis=(0, 1))) / (img.max(axis=(0, 1)) - img.min(axis=(0, 1)))
- return img * 255
- def clip_scan(img, min_value, max_value):
- """
- Clip scan to given range
- """
- return np.clip(img, min_value, max_value)
- def resize_scan(scan, new_height, new_width, scan_type):
- """
- Resize CT scan to given size
- """
- scan_shape = scan.shape
- resized_scan = np.zeros((new_height, new_width, scan_shape[2]), dtype=scan.dtype)
- resize_method = cv2.INTER_CUBIC if scan_type == "image" else cv2.INTER_NEAREST
- for start in range(0, scan_shape[2], scan_shape[1]):
- end = start + scan_shape[1]
- if end >= scan_shape[2]: end = scan_shape[2]
- resized_scan[:, :, start:end] = resize_image(
- scan[:, :, start:end],
- new_height, new_width,
- resize_method
- )
- return resized_scan
- def save_images(scan, save_path, img_index):
- """
- Based on UNet3+ requirement "input image had three channels, including
- the slice to be segmented and the upper and lower slices, which was
- cropped to 320×320" save each scan as separate image with previous and
- next scan concatenated.
- """
- scan_shape = scan.shape
- for index in range(scan_shape[-1]):
- before_index = index - 1 if (index - 1) > 0 else 0
- after_index = index + 1 if (index + 1) < scan_shape[-1] else scan_shape[-1] - 1
- new_img_path = join_paths(save_path, f"image_{img_index}_{index}.png")
- new_image = np.stack(
- (
- scan[:, :, before_index],
- scan[:, :, index],
- scan[:, :, after_index]
- )
- , axis=-1)
- new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR) # RGB to BGR
- cv2.imwrite(new_img_path, new_image) # save the images as .png
- def save_mask(scan, save_path, mask_index):
- """
- Save each scan as separate mask
- """
- for index in range(scan.shape[-1]):
- new_mask_path = join_paths(save_path, f"mask_{mask_index}_{index}.png")
- cv2.imwrite(new_mask_path, scan[:, :, index]) # save grey scale image
- def extract_image(cfg, image_path, save_path, scan_type="image", ):
- """
- Extract image from given scan path
- """
- _, index = str(Path(image_path).stem).split("-")
- scan = read_nii(image_path)
- scan = resize_scan(
- scan,
- cfg.DATA_PREPARATION.RESIZED_HEIGHT,
- cfg.DATA_PREPARATION.RESIZED_WIDTH,
- scan_type
- )
- if scan_type == "image":
- scan = clip_scan(
- scan,
- cfg.DATA_PREPARATION.SCAN_MIN_VALUE,
- cfg.DATA_PREPARATION.SCAN_MAX_VALUE
- )
- scan = linear_scale(scan)
- scan = np.uint8(scan)
- save_images(scan, save_path, index)
- else:
- # 0 for background/non-lesion, 1 for liver, 2 for lesion/tumor
- # merging label 2 into label 1, because lesion/tumor is part of liver
- scan = np.where(scan != 0, 1, scan)
- # scan = np.where(scan==2, 1, scan)
- scan = np.uint8(scan)
- save_mask(scan, save_path, index)
- def extract_images(cfg, images_path, save_path, scan_type="image", ):
- """
- Extract images paths using multiprocessing and pass to
- extract_image function for further processing .
- """
- # create pool
- process_count = np.clip(mp.cpu_count() - 2, 1, 20) # less than 20 workers
- pool = mp.Pool(process_count)
- for image_path in tqdm(images_path):
- pool.apply_async(extract_image,
- args=(cfg, image_path, save_path, scan_type),
- )
- # close pool
- pool.close()
- pool.join()
- @hydra.main(version_base=None, config_path="../configs", config_name="config")
- def preprocess_lits_data(cfg: DictConfig):
- """
- Preprocess LiTS 2017 (Liver Tumor Segmentation) data by extractions
- images and mask into UNet3+ data format
- """
- train_images_names = glob(
- join_paths(
- cfg.WORK_DIR,
- cfg.DATA_PREPARATION.SCANS_TRAIN_DATA_PATH,
- "volume-*.nii"
- )
- )
- train_mask_names = glob(
- join_paths(
- cfg.WORK_DIR,
- cfg.DATA_PREPARATION.SCANS_TRAIN_DATA_PATH,
- "segmentation-*.nii"
- )
- )
- assert len(train_images_names) == len(train_mask_names), \
- "Train volumes and segmentations are not same in length"
- val_images_names = glob(
- join_paths(
- cfg.WORK_DIR,
- cfg.DATA_PREPARATION.SCANS_VAL_DATA_PATH,
- "volume-*.nii"
- )
- )
- val_mask_names = glob(
- join_paths(
- cfg.WORK_DIR,
- cfg.DATA_PREPARATION.SCANS_VAL_DATA_PATH,
- "segmentation-*.nii"
- )
- )
- assert len(val_images_names) == len(val_mask_names), \
- "Validation volumes and segmentations are not same in length"
- train_images_names = sorted(train_images_names)
- train_mask_names = sorted(train_mask_names)
- val_images_names = sorted(val_images_names)
- val_mask_names = sorted(val_mask_names)
- train_images_path = join_paths(
- cfg.WORK_DIR, cfg.DATASET.TRAIN.IMAGES_PATH
- )
- train_mask_path = join_paths(
- cfg.WORK_DIR, cfg.DATASET.TRAIN.MASK_PATH
- )
- val_images_path = join_paths(
- cfg.WORK_DIR, cfg.DATASET.VAL.IMAGES_PATH
- )
- val_mask_path = join_paths(
- cfg.WORK_DIR, cfg.DATASET.VAL.MASK_PATH
- )
- create_directory(train_images_path)
- create_directory(train_mask_path)
- create_directory(val_images_path)
- create_directory(val_mask_path)
- print("\nExtracting train images")
- extract_images(
- cfg, train_images_names, train_images_path, scan_type="image"
- )
- print("\nExtracting train mask")
- extract_images(
- cfg, train_mask_names, train_mask_path, scan_type="mask"
- )
- print("\nExtracting val images")
- extract_images(
- cfg, val_images_names, val_images_path, scan_type="image"
- )
- print("\nExtracting val mask")
- extract_images(
- cfg, val_mask_names, val_mask_path, scan_type="mask"
- )
- if __name__ == '__main__':
- preprocess_lits_data()
|