diff --git a/build_models/evaluation.py b/build_models/evaluation.py new file mode 100644 index 0000000000000000000000000000000000000000..b6a49fadee2b6cb35b2cb298ffb4319388dc83b0 --- /dev/null +++ b/build_models/evaluation.py @@ -0,0 +1,121 @@ +import numpy as np +import json +from sklearn.metrics import confusion_matrix +import seaborn as sns +import matplotlib.pyplot as plt +import itertools + +from parameters import SAVE_MISCLASSIFIED_PATH, SAVE_CONFUSIONM_PATH, SAVE_PATH_LEARNING_CURVES, MODEL_NAME + +def get_learning_curves(train_losses, val_losses, train_accuracies, val_accuracies, savename = None): + epochs = range(1, len(train_losses) + 1) + + plt.figure(figsize=(15,6)) + plt.subplot(1,2,1) + plt.plot(epochs, train_losses, '-o', label='Training loss') + plt.plot(epochs, val_losses, '-o', label='Validation loss') + plt.legend() + plt.title('Learning curves') + plt.xlabel('Epoch') + plt.ylabel('Loss') + plt.xticks(epochs[::2], rotation=45) + plt.subplot(1,2,2) + plt.plot(epochs, train_accuracies, '-o', label='Training accuracy') + plt.plot(epochs, val_accuracies, '-o', label='Validation accuracy') + plt.legend() + plt.title('Learning curves') + plt.xlabel('Epoch') + plt.ylabel('accuracy') + plt.xticks(epochs[::2], rotation=45) + if savename is not None: + plt.savefig(SAVE_PATH_LEARNING_CURVES + savename + ".png", dpi=300, bbox_inches='tight') + else: + plt.savefig(SAVE_PATH_LEARNING_CURVES + ".png", dpi=300, bbox_inches='tight') + + +def averaged_misclassified_data(final_predictions, ground_truth_dict, grouped_predictions): + y_true = [] + y_pred_major = [] + misclassified_path = [] + misclassified_data = {} + + for base_name, predicted_class in final_predictions.items(): + y_true.append(ground_truth_dict[base_name]) + y_pred_major.append(predicted_class) + + if predicted_class != ground_truth_dict[base_name]: + misclassified_path.append(base_name) + misclassified_data[base_name] = { + "gt": int(ground_truth_dict[base_name]), # Convert to standard Python int + "each_pred": [int(np.argmax(arr)) for arr in grouped_predictions[base_name]], # Ensure each_pred is a list of Python ints + "each_conf": [float(np.max(arr)) for arr in grouped_predictions[base_name]], # Ensure each_conf is a list of Python floats + "final_pred": int(final_predictions[base_name]), # Convert to standard Python int + "confidence": float(np.max(np.sum(np.array(grouped_predictions[base_name]), axis=0) / 3)) # Convert to Python float + } + # without avergaing + """ misclassified_data[base_name] = { + "gt": ground_truth_dict[base_name], # Ground truth value + "each_pred": [np.argmax(arr) for arr in grouped_predictions[base_name]], # Each prediction + "each_conf": [np.max(arr) for arr in grouped_predictions[base_name]], # Each confidence + "final_pred": final_predictions[base_name], # Final prediction + "confidence": np.max(np.sum(np.array(grouped_predictions[base_name]), axis = 0)/3) + }""" + + y_true = np.array(y_true) + y_pred_major = np.array(y_pred_major) + + # Save the mislabelled dictionary to a JSON file + SAVE_MISCLASSIFIED_PATH_averaging = SAVE_MISCLASSIFIED_PATH + "averaging.json" + with open(SAVE_MISCLASSIFIED_PATH_averaging, 'w') as json_file: + json.dump(misclassified_data, json_file, indent=4) # indent for pretty printing + + return y_true, y_pred_major + +def different_anatomical_plane_misclassified_data(idx_pred, y_true, prob_pred, gt_labels_conf, test_image_paths, test_labels): + error_indicator = idx_pred != y_true + idxs = [i for i, x in enumerate(error_indicator) if x] + misclassified_prob = list(itertools.compress(prob_pred, error_indicator)) + misclassified_gt_prob = list(itertools.compress(gt_labels_conf, error_indicator)) + misclassified_path = list(itertools.compress(test_image_paths, error_indicator)) + mislabelled = {} + + for i in range(0, len(idxs)): # Loop through indexes directly + true_label = test_labels[y_true[idxs[i]]] # Get the true label once + image_path = misclassified_path[i][0].rsplit("_", 2)[0] # Get the wrong path once + model_name_entry = mislabelled.setdefault(true_label, {}).setdefault(image_path, {}).setdefault(MODEL_NAME, {}) + + # Assign predicted layer and confidence + model_name_entry['predicted_layer'] = test_labels[idx_pred[idxs[i]]] + model_name_entry['confidence'] = float(misclassified_prob[i]) + model_name_entry['gt_confidence'] = float(misclassified_gt_prob[i]) + + # Save the mislabelled dictionary to a JSON file + with open(SAVE_MISCLASSIFIED_PATH, 'w') as json_file: + json.dump(mislabelled, json_file, indent=4) # indent for pretty printing + +def get_confusionM(y_true, y_pred_major, labels, test_set_len, averaging = False): + num_errors = np.sum(y_true != y_pred_major) + + conf_matrix = confusion_matrix(y_true, y_pred_major) + TP = conf_matrix.diagonal() + P = conf_matrix.sum(axis=1) + + # Calculate balanced accuracy + balanced_accuracy = sum(TP / P) / len(P) + + print(f'Test errors {num_errors} (out of {test_set_len:.0f}) {num_errors/test_set_len*100:0.2f}%') + print(f'Test accuracy {100-num_errors/test_set_len*100:0.2f}%') + print(f'Balanced accuracy {balanced_accuracy*100:0.2f}%') + + plt.figure(figsize=(8, 6)) + sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', + xticklabels=labels.values(), + yticklabels=labels.values()) + plt.xlabel('Predicted Labels') + plt.ylabel('True Labels') + plt.title('Confusion Matrix') + if averaging: + plt.savefig(SAVE_CONFUSIONM_PATH + "averaging.png", dpi=300, bbox_inches='tight') + else: + plt.savefig(SAVE_CONFUSIONM_PATH, dpi=300, bbox_inches='tight') + plt.show() \ No newline at end of file diff --git a/build_models/main.py b/build_models/main.py new file mode 100644 index 0000000000000000000000000000000000000000..db10ef7a1c4388facdb1ee58d31ac98202c8bbdd --- /dev/null +++ b/build_models/main.py @@ -0,0 +1,277 @@ +import numpy as np +import random +from pprint import pprint +import os +import timm +from collections import Counter +import sys +import time + +import torch +import torchvision.transforms as transforms +from torchvision.datasets import ImageFolder +from torch.utils.data import DataLoader +import torch.nn as nn +import torch.optim as optim + +from parameters import (PIPELINE, TRAIN_FOLDER, VAL_FOLDER, TEST_FOLDER, DEVICE, BATCH_SIZE, WORKERS, MODEL_NAME, EPOCHS, + CHECKPOINTS_FOLDER, SAVE_PREDICTION_PATH, LOG_PATH) +from own_dataloader import (different_anatomical_plane_in_rgb, same_anatomical_plane_in_rgb, different_anatomical_plane_ensemble, + ImageTripletDataset_to_one, same_anatomical_planes_in_RGB_ensemble, ImageTripletDataset_to_three, + visualize_RGB_image_from_dataloader, get_data_distribution,visualize_ENSEMBLE_image_from_dataloader, RandomResample) +from train_functions import train_with_scaler +from test_functions import predict, averaging_prediction, predict_ENSEMBLE_with_confidence +from evaluation import get_learning_curves, averaged_misclassified_data, different_anatomical_plane_misclassified_data, get_confusionM + +# FIX THE SEEDS +random.seed(42) +torch.manual_seed(42) +np.random.seed(42) + +torch.backends.cudnn.deterministic = True +torch.backends.cudnn.benchmark = False + +if torch.cuda.is_available(): + torch.cuda.manual_seed_all(42) + +os.environ['PYTHONHASHSEED'] = '42' + +log_file = open(LOG_PATH, "w") +sys.stdout = log_file + +# LOAD DATA +num_classes = get_data_distribution() + +train_transform = transforms.Compose([ + RandomResample(scale_factor=2), + transforms.ToTensor(), + transforms.RandomHorizontalFlip(p=0.5), + transforms.RandomVerticalFlip(p=0.5), + transforms.RandomRotation(degrees=90, expand=True), + + transforms.Resize(224) +]) +valid_transform = transforms.Compose([ + transforms.ToTensor(), + transforms.Resize(224) + +]) +if PIPELINE == "basic": + train_set = ImageFolder(TRAIN_FOLDER, transform = train_transform) + val_set = ImageFolder(VAL_FOLDER, transform = valid_transform) + test_set = ImageFolder(TEST_FOLDER, transform = valid_transform) +elif PIPELINE == "different_anatomical_plane_in_rgb": + train_set = different_anatomical_plane_in_rgb(TRAIN_FOLDER, transform = train_transform) + val_set = different_anatomical_plane_in_rgb(VAL_FOLDER, transform = valid_transform) + test_set = different_anatomical_plane_in_rgb(TEST_FOLDER, transform = valid_transform) +elif PIPELINE == "same_anatomical_plane_in_rgb": + train_set = same_anatomical_plane_in_rgb(TRAIN_FOLDER, transform = train_transform) + val_set = same_anatomical_plane_in_rgb(VAL_FOLDER, transform = valid_transform) + test_set = same_anatomical_plane_in_rgb(TEST_FOLDER, transform = valid_transform) +elif PIPELINE == "different_anatomical_plane_ensemble": + train_set = different_anatomical_plane_ensemble(TRAIN_FOLDER, transform = train_transform) + datasetAxial, datasetCoronial, datasetSagittal = different_anatomical_plane_ensemble.split_dataset_based_on_number(train_set.samples) + train_datasetAxial = ImageTripletDataset_to_one(datasetAxial, transform = train_transform) + train_datasetCoronial = ImageTripletDataset_to_one(datasetCoronial, transform = train_transform) + train_datasetSagittal = ImageTripletDataset_to_one(datasetSagittal, transform = train_transform) + + val_set = different_anatomical_plane_ensemble(VAL_FOLDER, transform = valid_transform) + datasetAxial, datasetCoronial, datasetSagittal = different_anatomical_plane_ensemble.split_dataset_based_on_number(val_set.samples) + val_datasetAxial = ImageTripletDataset_to_one(datasetAxial, transform = valid_transform) + val_datasetCoronial = ImageTripletDataset_to_one(datasetCoronial, transform = valid_transform) + val_datasetSagittal = ImageTripletDataset_to_one(datasetSagittal, transform = valid_transform) + + test_set = different_anatomical_plane_ensemble(TEST_FOLDER, transform = valid_transform) + datasetAxial, datasetCoronial, datasetSagittal = different_anatomical_plane_ensemble.split_dataset_based_on_number(test_set.samples) + test_datasetAxial = ImageTripletDataset_to_one(datasetAxial, transform = valid_transform) + test_datasetCoronial = ImageTripletDataset_to_one(datasetCoronial, transform = valid_transform) + test_datasetSagittal = ImageTripletDataset_to_one(datasetSagittal, transform = valid_transform) +elif PIPELINE == "ensemble_same_anatomical_planes_in_RGB": + train_set = same_anatomical_planes_in_RGB_ensemble(TRAIN_FOLDER, transform = train_transform) + datasetAxial, datasetCoronial, datasetSagittal = same_anatomical_planes_in_RGB_ensemble.split_dataset_based_on_number(train_set.samples) + train_datasetAxial = ImageTripletDataset_to_three(datasetAxial, transform = train_transform) + train_datasetCoronial = ImageTripletDataset_to_three(datasetCoronial, transform = train_transform) + train_datasetSagittal = ImageTripletDataset_to_three(datasetSagittal, transform = train_transform) + + val_set = same_anatomical_planes_in_RGB_ensemble(VAL_FOLDER, transform = valid_transform) + datasetAxial, datasetCoronial, datasetSagittal = same_anatomical_planes_in_RGB_ensemble.split_dataset_based_on_number(val_set.samples) + val_datasetAxial = ImageTripletDataset_to_three(datasetAxial, transform = valid_transform) + val_datasetCoronial = ImageTripletDataset_to_three(datasetCoronial, transform = valid_transform) + val_datasetSagittal = ImageTripletDataset_to_three(datasetSagittal, transform = valid_transform) + + test_set = same_anatomical_planes_in_RGB_ensemble(TEST_FOLDER, transform = valid_transform) + datasetAxial, datasetCoronial, datasetSagittal = same_anatomical_planes_in_RGB_ensemble.split_dataset_based_on_number(test_set.samples) + test_datasetAxial = ImageTripletDataset_to_three(datasetAxial, transform = valid_transform) + test_datasetCoronial = ImageTripletDataset_to_three(datasetCoronial, transform = valid_transform) + test_datasetSagittal = ImageTripletDataset_to_three(datasetSagittal, transform = valid_transform) + +if PIPELINE == "basic" or PIPELINE == "different_anatomical_plane_in_rgb" or PIPELINE == "same_anatomical_plane_in_rgb": + train_loader = DataLoader(train_set, batch_size = BATCH_SIZE, shuffle = True, num_workers=WORKERS, drop_last=True) + val_loader = DataLoader(val_set, batch_size = BATCH_SIZE, shuffle = False, num_workers=WORKERS) + test_loader = DataLoader(test_set, batch_size = 1, shuffle = False, num_workers=WORKERS) + + labels = [label for _, label in train_set.samples] + test_image_paths = [path for path, _ in test_set.samples] + print(f'test samples: {len(test_set)}') + + test_labels = {v: k for k, v in test_set.class_to_idx.items()} + print(f"Len of test labels: {len(test_labels)}, labels: {test_labels}") + + visualize_RGB_image_from_dataloader(train_loader) + +elif PIPELINE == "different_anatomical_plane_ensemble" or PIPELINE == "ensemble_same_anatomical_planes_in_RGB": + train_axial_loader = DataLoader(train_datasetAxial, batch_size = BATCH_SIZE, shuffle = True, num_workers=WORKERS, drop_last=True) + train_coronial_loader = DataLoader(train_datasetCoronial, batch_size = BATCH_SIZE, shuffle = True, num_workers=WORKERS, drop_last=True) + train_sagittal_loader = DataLoader(train_datasetSagittal, batch_size = BATCH_SIZE, shuffle = True, num_workers=WORKERS, drop_last=True) + + val_axial_loader = DataLoader(val_datasetAxial, batch_size = BATCH_SIZE, shuffle = False, num_workers=WORKERS) + val_coronial_loader = DataLoader(val_datasetCoronial, batch_size = BATCH_SIZE, shuffle = False, num_workers=WORKERS) + val_sagittal_loader = DataLoader(val_datasetSagittal, batch_size = BATCH_SIZE, shuffle = False, num_workers=WORKERS) + + test_axial_loader = DataLoader(test_datasetAxial, batch_size = 1, shuffle = False, num_workers=WORKERS) + test_coronial_loader = DataLoader(test_datasetCoronial, batch_size = 1, shuffle = False, num_workers=WORKERS) + test_sagittal_loader = DataLoader(test_datasetSagittal, batch_size = 1, shuffle = False, num_workers=WORKERS) + + labels = [label for _, label in train_datasetAxial.samples] + test_image_paths = [path for path, _ in test_datasetAxial.samples] + print(f'test samples: {len(test_datasetAxial)}') + test_labels = {v: k for k, v in test_datasetAxial.class_to_idx.items()} + print(f"Len of test labels: {len(test_labels)}, labels: {test_labels}") + if PIPELINE == "different_anatomical_plane_ensemble": + visualize_ENSEMBLE_image_from_dataloader(train_axial_loader, train_coronial_loader, train_sagittal_loader) + else: + visualize_RGB_image_from_dataloader(train_axial_loader, "_model1") + visualize_RGB_image_from_dataloader(train_coronial_loader, "_model2") + visualize_RGB_image_from_dataloader(train_sagittal_loader, "_model3") + +num_in_class_dict = dict(Counter(labels)) +num_in_class = np.zeros([1,len(num_in_class_dict)]) +for i in range(0, len(num_in_class_dict)): + num_in_class[0, i] = num_in_class_dict[i] + +class_weights = 1-(num_in_class/num_in_class.sum()).squeeze() +class_weights_tensor = torch.Tensor(class_weights).to(DEVICE) + +print("Class distribution (number of samples per class):", num_in_class_dict) +print("Class distribution as numpy array:", num_in_class) +print("Class weights:", class_weights_tensor) + +torch.cuda.is_available() + +# TRAINING +start_time = time.time() +if PIPELINE == "basic" or PIPELINE == "different_anatomical_plane_in_rgb" or PIPELINE == "same_anatomical_plane_in_rgb": + model = timm.create_model(MODEL_NAME, pretrained=True, num_classes=num_classes) + model.to(DEVICE) + criterion_balanced = nn.CrossEntropyLoss(weight = class_weights_tensor) + optimizer_Adam = optim.Adam(model.parameters(), 1e-3) + scaler = torch.amp.GradScaler(DEVICE) + + if not os.path.exists(CHECKPOINTS_FOLDER): + os.mkdir(CHECKPOINTS_FOLDER) + + train_losses, val_losses, train_accuracies, val_accuracies, BEST_EPOCH = train_with_scaler(model, train_loader, val_loader, optimizer_Adam, criterion_balanced, + EPOCHS, scaler, DEVICE, checkpoints_foler=CHECKPOINTS_FOLDER) + + get_learning_curves(train_losses, val_losses, train_accuracies, val_accuracies) + print(f"Best epoch: {BEST_EPOCH}") + print(f"Best val_losses: {val_losses[BEST_EPOCH-1]}, Best val_accuracies: {val_accuracies[BEST_EPOCH-1]}") + +elif PIPELINE == "different_anatomical_plane_ensemble" or PIPELINE == "ensemble_same_anatomical_planes_in_RGB": + model1 = timm.create_model(MODEL_NAME, pretrained=True, num_classes=num_classes) + model1.to(DEVICE) + criterion_balanced1 = nn.CrossEntropyLoss(weight = class_weights_tensor) + optimizer_Adam1 = optim.Adam(model1.parameters(), 1e-3) + scaler1 = torch.amp.GradScaler('cuda') + checkpoints_folder1 = CHECKPOINTS_FOLDER + '_ensemble_1_checkpoints' + if not os.path.exists(checkpoints_folder1): + os.mkdir(checkpoints_folder1) + + + train_losses1, val_losses1, train_accuracies1, val_accuracies1, BEST_EPOCH1 = train_with_scaler(model1, train_axial_loader, val_axial_loader, + optimizer_Adam1, criterion_balanced1, EPOCHS, scaler1, + DEVICE, checkpoints_foler=checkpoints_folder1) + + model2 = timm.create_model(MODEL_NAME, pretrained=True, num_classes=num_classes) + model2.to(DEVICE) + criterion_balanced2 = nn.CrossEntropyLoss(weight = class_weights_tensor) + optimizer_Adam2 = optim.Adam(model2.parameters(), 1e-3) + scaler2 = torch.amp.GradScaler('cuda') + checkpoints_folder2 = CHECKPOINTS_FOLDER + '_ensemble_2_checkpoints' + if not os.path.exists(checkpoints_folder2): + os.mkdir(checkpoints_folder2) + train_losses2, val_losses2, train_accuracies2, val_accuracies2, BEST_EPOCH2 = train_with_scaler(model2, train_coronial_loader, val_coronial_loader, + optimizer_Adam2, criterion_balanced2, EPOCHS, scaler2, + DEVICE, checkpoints_foler=checkpoints_folder2) + + model3 = timm.create_model(MODEL_NAME, pretrained=True, num_classes=num_classes) + model3.to(DEVICE) + criterion_balanced3 = nn.CrossEntropyLoss(weight = class_weights_tensor) + optimizer_Adam3 = optim.Adam(model3.parameters(), 1e-3) + scaler3 = torch.amp.GradScaler('cuda') + checkpoints_folder3 = CHECKPOINTS_FOLDER + 'ensemble_3_checkpoints' + if not os.path.exists(checkpoints_folder3): + os.mkdir(checkpoints_folder3) + train_losses3, val_losses3, train_accuracies3, val_accuracies3, BEST_EPOCH3 = train_with_scaler(model3, train_sagittal_loader, val_sagittal_loader, + optimizer_Adam3, criterion_balanced3, EPOCHS, scaler3, + DEVICE, checkpoints_foler=checkpoints_folder3) + get_learning_curves(train_losses1, val_losses1, train_accuracies1, val_accuracies1, "_1_model") + get_learning_curves(train_losses2, val_losses2, train_accuracies2, val_accuracies2, "_2_model") + get_learning_curves(train_losses3, val_losses3, train_accuracies3, val_accuracies3, "_3_model") + print(f"Best epoch_model1: {BEST_EPOCH1}, Best epoch_model2: {BEST_EPOCH2}, Best epoch_model3: {BEST_EPOCH3}") + print(f"MODEL1: Best val_losses: {val_losses1[BEST_EPOCH1-1]}, Best val_accuracies: {val_accuracies1[BEST_EPOCH1-1]}") + print(f"MODEL2: Best val_losses: {val_losses2[BEST_EPOCH2-1]}, Best val_accuracies: {val_accuracies2[BEST_EPOCH2-1]}") + print(f"MODEL3: Best val_losses: {val_losses3[BEST_EPOCH3-1]}, Best val_accuracies: {val_accuracies3[BEST_EPOCH3-1]}") + +print(f"Training time: {time.time() - start_time} s") + +# TESTING +if PIPELINE == "basic" or PIPELINE == "different_anatomical_plane_in_rgb" or PIPELINE == "same_anatomical_plane_in_rgb": + model = torch.load(CHECKPOINTS_FOLDER+f'/avp_{BEST_EPOCH:03d}.pkl') + model.to(DEVICE) + + print("prediction started") + y_pred, gt_labels_conf = predict(model, test_loader) + np.save(SAVE_PREDICTION_PATH + str(BEST_EPOCH), y_pred) + print("prediction saved") + +elif PIPELINE == "different_anatomical_plane_ensemble" or PIPELINE == "ensemble_same_anatomical_planes_in_RGB": + model1 = torch.load(checkpoints_folder1+f'/avp_{BEST_EPOCH1:03d}.pkl') + model2 = torch.load(checkpoints_folder2+f'/avp_{BEST_EPOCH2:03d}.pkl') + model3 = torch.load(checkpoints_folder3+f'/avp_{BEST_EPOCH3:03d}.pkl') + #print(f"Best epochs: \n model1: {BEST_EPOCH1} \n model2: {BEST_EPOCH2} \n model3: {BEST_EPOCH3}") + y_pred_majority, y_true, confidence_scores_majority, gt_labels_conf = predict_ENSEMBLE_with_confidence(model1, model2, model3, test_axial_loader, test_coronial_loader, test_sagittal_loader, DEVICE) + #print(f"y_pred_majority: {y_pred_majority}, \n y_true: {y_true}, \n confidence_scores_majority: {confidence_scores_majority}, \n confidence_scores_true: {gt_labels_conf}") + #print(f"y_pred_majority: {len(y_pred_majority)}, \n y_true: {len(y_true)}, \n confidence_scores_majority: {len(confidence_scores_majority)}, \n confidence_scores_true: {len(gt_labels_conf)}") + +#EVALUATION +if PIPELINE == "different_anatomical_plane_in_rgb": + # different and same anatomical planes in RGB + y_pred_majority = y_pred.argmax(axis=1) # find the argmax of each of the predictions + confidence_scores_majority = y_pred.max(axis = 1) #get the confidence score + y_true = [label for _, label in test_set.samples] # get the true labels and convert to numpy + get_confusionM(y_true, y_pred_majority, test_labels, len(test_set)/3) + different_anatomical_plane_misclassified_data(y_pred_majority, y_true, confidence_scores_majority, gt_labels_conf, test_image_paths, test_labels) + +elif PIPELINE == "different_anatomical_plane_ensemble" or PIPELINE == "ensemble_same_anatomical_planes_in_RGB": + # different and same anatomical plane ensemble + get_confusionM(y_true, y_pred_majority, test_labels, len(test_datasetAxial)) + different_anatomical_plane_misclassified_data(y_pred_majority, y_true, confidence_scores_majority, gt_labels_conf, test_image_paths, test_labels) + +elif PIPELINE == "basic" or PIPELINE == "same_anatomical_plane_in_rgb": + y_pred_majority = y_pred.argmax(axis=1) # find the argmax of each of the predictions + confidence_scores_majority = y_pred.max(axis = 1) #get the confidence score + y_true = [label for _, label in test_set.samples] # get the true labels and convert to numpy + get_confusionM(y_true, y_pred_majority, test_labels, len(test_set)) + different_anatomical_plane_misclassified_data(y_pred_majority, y_true, confidence_scores_majority, gt_labels_conf, test_image_paths, test_labels) + + # averaging evaluation -> simaknal meg a same anatomical planes in RGB + grouped_predictions, ground_truth_dict, final_predictions = averaging_prediction(y_pred, test_set, test_image_paths) + y_true, y_pred_major = averaged_misclassified_data(final_predictions, ground_truth_dict, grouped_predictions) + get_confusionM(y_true, y_pred_major, test_labels,len(test_set)/3 ,averaging=True) + + +sys.stdout = sys.__stdout__ # Reset stdout back to normal +log_file.close() + +print("DONE") \ No newline at end of file diff --git a/build_models/own_dataloader.py b/build_models/own_dataloader.py new file mode 100644 index 0000000000000000000000000000000000000000..a4e1473b40641907219826df9b0d6b5cf8f284ff --- /dev/null +++ b/build_models/own_dataloader.py @@ -0,0 +1,596 @@ +from torch.utils.data import Dataset +import numpy as np +import os +from PIL import Image +import matplotlib.pyplot as plt +import random + +from parameters import SAVE_INPUT_VISUALIZATION_PATH, TRAIN_FOLDER, VAL_FOLDER, TEST_FOLDER + +# define custom resample class to change image resolution without rescaling +class RandomResample: + def __init__(self, scale_factor): + self.scale_factor = random.uniform(0,scale_factor) + + def __call__(self, img): + # Downsample + if isinstance(img, np.ndarray): + img = Image.fromarray(img) + + width, height = img.size + downscaled_size = (int(width / self.scale_factor), int(height / self.scale_factor)) + + # Downsample the image + img_downsampled = img.resize(downscaled_size) + + # Upsample back to the original size + img_upsampled = img_downsampled.resize((width, height)) + + return img_upsampled + +def get_data_distribution(): + train_categories = os.listdir(TRAIN_FOLDER) + val_categories = os.listdir(VAL_FOLDER) + test_categories = os.listdir(TEST_FOLDER) + + print("Train image distribution: ") + class_num_in_train = [] + for i in range(0, len(train_categories)): + CLASS_FOLDER = TRAIN_FOLDER + '/' + train_categories[i] + class_elements = os.listdir(CLASS_FOLDER) + class_num_in_train.append(len(class_elements)) + print(f' {train_categories[i]}: {class_num_in_train[i]}') + + print("Validation image distribution: ") + class_num_in_val = [] + for i in range(0, len(val_categories)): + CLASS_FOLDER = VAL_FOLDER + '/' + val_categories[i] + class_elements = os.listdir(CLASS_FOLDER) + class_num_in_val.append(len(class_elements)) + print(f' {val_categories[i]}: {class_num_in_val[i]}') + + print("Test image distribution: ") + class_num_in_test = [] + for i in range(0, len(test_categories)): + CLASS_FOLDER = TEST_FOLDER + '/' + test_categories[i] + class_elements = os.listdir(CLASS_FOLDER) + class_num_in_test.append(len(class_elements)) + print(f' {test_categories[i]}: {class_num_in_test[i]}') + + return len(class_num_in_train) + +def visualize_RGB_image_from_dataloader(dataloader, spec_name = None): + # Get a batch from the dataloader + images, labels = next(iter(dataloader)) + # Take the first image from the batch (assuming the batch size > 0) + img = images[0].numpy() # Convert the tensor to a numpy array + + # Rearrange the image shape from (C, H, W) to (H, W, C) for plotting + img_rgb = np.transpose(img, (1, 2, 0)) + + # Plot the RGB image and individual channels + fig, axes = plt.subplots(1, 4, figsize=(20, 5)) + + # Plot the RGB image + axes[0].imshow(img_rgb) + axes[0].set_title(f"RGB Image: {labels[0]}") + axes[0].axis('off') # Hide axes for clarity + + # Plot the Red channel + axes[1].imshow(img[0], cmap='Reds') # First channel (Red) + axes[1].set_title('Axial - Red Channel') + axes[1].axis('off') + + # Plot the Green channel + axes[2].imshow(img[1], cmap='Greens') # Second channel (Green) + axes[2].set_title('Coronal - Green Channel') + axes[2].axis('off') + + # Plot the Blue channel + axes[3].imshow(img[2], cmap='Blues') # Third channel (Blue) + axes[3].set_title('Sagittal - Blue Channel') + axes[3].axis('off') + + plt.tight_layout() + if spec_name is not None: + plt.savefig(SAVE_INPUT_VISUALIZATION_PATH + spec_name + ".png", dpi=300, bbox_inches='tight') + else: + plt.savefig(SAVE_INPUT_VISUALIZATION_PATH + ".png", dpi=300, bbox_inches='tight') + plt.show() + +def visualize_ENSEMBLE_image_from_dataloader(dataloader1, dataloader2, dataloader3): + # Get a batch from the dataloader + images, labels = next(iter(dataloader1)) + # Take the first image from the batch (assuming the batch size > 0) + img = images[0].numpy() # Convert the tensor to a numpy array + img_transposed = img.transpose(1, 2, 0) + img_pil = Image.fromarray((img_transposed * 255).astype(np.uint8)) + gray_img_pil = img_pil.convert("L") + img = np.array(gray_img_pil) + # Rearrange the image shape from (C, H, W) to (H, W, C) for plotting + #img_rgb = np.transpose(img, (1, 2, 0)) + + images2, _ = next(iter(dataloader2)) + img2 = images2[0].numpy() # Convert the tensor to a numpy array + img2_transposed = img2.transpose(1, 2, 0) + img2_pil = Image.fromarray((img2_transposed * 255).astype(np.uint8)) + gray_img2_pil = img2_pil.convert("L") + img2 = np.array(gray_img2_pil) + + images3, _ = next(iter(dataloader3)) + # Take the first image from the batch (assuming the batch size > 0) + img3 = images3[0].numpy() # Convert the tensor to a numpy array + img3_transposed = img3.transpose(1, 2, 0) + img3_pil = Image.fromarray((img3_transposed * 255).astype(np.uint8)) + gray_img3_pil = img3_pil.convert("L") + img3 = np.array(gray_img3_pil) + + rgb_image = np.stack((img, img2, img3), axis=-1) + + # Plot the RGB image and individual channels + fig, axes = plt.subplots(1, 4, figsize=(20, 5)) + + # Plot the RGB image + axes[0].imshow(rgb_image.squeeze()) + axes[0].set_title(f"RGB Image: {labels[0]}") + axes[0].axis('off') # Hide axes for clarity + + # Plot the Red channel + axes[1].imshow(img.squeeze(), cmap='Reds') # First channel (Red) + axes[1].set_title('Axial - Red Channel') + axes[1].axis('off') + + # Plot the Green channel + axes[2].imshow(img2.squeeze(), cmap='Greens') # Second channel (Green) + axes[2].set_title('Coronal - Green Channel') + axes[2].axis('off') + + # Plot the Blue channel + axes[3].imshow(img3.squeeze(), cmap='Blues') # Third channel (Blue) + axes[3].set_title('Sagittal - Blue Channel') + axes[3].axis('off') + + plt.tight_layout() + plt.savefig(SAVE_INPUT_VISUALIZATION_PATH + ".png", dpi=300, bbox_inches='tight') + plt.show() + +class different_anatomical_plane_in_rgb(Dataset): + def __init__(self, root_dir, transform=None): + """ + Args: + root_dir (str): Directory with all the images and subfolders (A, B, C, D, E). + transform (callable, optional): Optional transform to be applied on a sample. + """ + self.root_dir = root_dir + self.transform = transform + self.class_to_idx = self._create_class_to_idx() + self.samples = self._load_samples() + + def _create_class_to_idx(self): + """Creates a mapping from folder names to integer labels.""" + class_to_idx = { + 'FLAIR': 0, + 'FLAIRCE': 1, + 'OTHER': 2, + 'T1w': 3, + 'T1wCE': 4, + 'T2star': 5, + 'T2w': 6 + } + return class_to_idx + + def _load_samples(self): + """Collects all the image paths from the subfolders and associates labels based on folder name.""" + samples = [] + + # List all the subfolders A, B, C, D, E, etc. + for folder in os.listdir(self.root_dir): + folder_path = os.path.join(self.root_dir, folder) + + if os.path.isdir(folder_path): # Check if it is a folder + # List all files in the folder and sort them + folder_items = os.listdir(folder_path) + main_file_names = set([path.rsplit("_", 2)[0] for path in folder_items]) + for name in main_file_names: + if name != "index.html": + files_to_connect = sorted([path for path in folder_items if name in path]) + img_paths = [os.path.join(folder_path, files_to_connect[0]), + os.path.join(folder_path, files_to_connect[1]), + os.path.join(folder_path, files_to_connect[2])] + + label = self.class_to_idx[folder] + samples.append((img_paths, label)) + + return samples + + def __len__(self): + """Returns the number of samples.""" + return len(self.samples) + + def __getitem__(self, idx): + """Fetches an item and applies any transformations.""" + # Get the paths for the three images and label + img_paths, label = self.samples[idx] + + # Load the three grayscale images + img0 = Image.open(img_paths[0]).convert('L') # Convert to grayscale + img1 = Image.open(img_paths[1]).convert('L') + img2 = Image.open(img_paths[2]).convert('L') + + # Convert to numpy arrays + img0_array = np.array(img0) + img1_array = np.array(img1) + img2_array = np.array(img2) + + # Stack into an RGB image + rgb_image = np.stack((img0_array, img1_array, img2_array), axis=-1) + + # Convert numpy array to PyTorch tensor (C, H, W) format + #rgb_image = torch.tensor(rgb_image).permute(2, 0, 1).float() # Convert to C, H, W + if self.transform: + rgb_image = self.transform(rgb_image) + + return rgb_image, label + +class different_anatomical_plane_ensemble(Dataset): + def __init__(self, root_dir, transform=None): + """ + Args: + root_dir (str): Directory with all the images and subfolders (A, B, C, D, E). + transform (callable, optional): Optional transform to be applied on a sample. + """ + self.root_dir = root_dir + self.transform = transform + self.class_to_idx = self._create_class_to_idx() + self.samples = self._load_samples() + + def _create_class_to_idx(self): + """Creates a mapping from folder names to integer labels.""" + class_to_idx = { + 'FLAIR': 0, + 'FLAIRCE': 1, + 'OTHER': 2, + 'T1w': 3, + 'T1wCE': 4, + 'T2star': 5, + 'T2w': 6 + } + return class_to_idx + + def _load_samples(self): + """Collects all the image paths from the subfolders and associates labels based on folder name.""" + samples = [] + + # List all the subfolders A, B, C, D, E, etc. + for folder in os.listdir(self.root_dir): + folder_path = os.path.join(self.root_dir, folder) + + if os.path.isdir(folder_path): # Check if it is a folder + # List all files in the folder and sort them + folder_items = os.listdir(folder_path) + main_file_names = set([path.rsplit("_", 2)[0] for path in folder_items]) + for name in main_file_names: + if name != "index.html": + files_to_connect = sorted([path for path in folder_items if name in path]) + for i in range(0, len(files_to_connect)): + img_paths = [os.path.join(folder_path, files_to_connect[i])] + group = i + label = self.class_to_idx[folder] + samples.append((img_paths, label, group)) + + return samples + + def __len__(self): + """Returns the number of samples.""" + return len(self.samples) + + def __getitem__(self, idx): + """Fetches an item and applies any transformations.""" + # Get the paths for the three images and label + img_paths, label, _ = self.samples[idx] + + # Load the three grayscale images + img0 = Image.open(img_paths[0]).convert('L') # Convert to grayscale + img1 = Image.open(img_paths[1]).convert('L') + img2 = Image.open(img_paths[2]).convert('L') + + # Convert to numpy arrays + img0_array = np.array(img0) + img1_array = np.array(img1) + img2_array = np.array(img2) + + # Stack into an RGB image + rgb_image = np.stack((img0_array, img1_array, img2_array), axis=-1) + + # Convert numpy array to PyTorch tensor (C, H, W) format + #rgb_image = torch.tensor(rgb_image).permute(2, 0, 1).float() # Convert to C, H, W + if self.transform: + rgb_image = self.transform(rgb_image) + + return rgb_image, label + + def split_dataset_based_on_number(samples): + """Splits the dataset into 3 datasets based on the number in the filename (0, 1, 2).""" + datasetAxial = [] + datasetCoronial = [] + datasetSagittal = [] + + # Divide samples into 3 datasets based on the `numberY` (0, 1, 2) + for img_paths, label, group in samples: + if group == 0: + datasetAxial.append((img_paths, label)) + elif group == 1: + datasetCoronial.append((img_paths, label)) + elif group == 2: + datasetSagittal.append((img_paths, label)) # 2nd group for datasetC + + return datasetAxial, datasetCoronial, datasetSagittal + +class ImageTripletDataset_to_one(Dataset): + def __init__(self, samples, transform=None): + """ + Args: + samples (list): List of tuples where each tuple is (img_paths, label). + transform (callable, optional): Optional transform to be applied on a sample. + """ + self.class_to_idx = self._create_class_to_idx() + self.samples = samples + self.transform = transform + + def _create_class_to_idx(self): + """Creates a mapping from folder names to integer labels.""" + class_to_idx = { + 'FLAIR': 0, + 'FLAIRCE': 1, + 'OTHER': 2, + 'T1w': 3, + 'T1wCE': 4, + 'T2star': 5, + 'T2w': 6 + } + return class_to_idx + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + # Get the paths for the three images and label + img_paths, label = self.samples[idx] + + # Load the three grayscale images + img0 = Image.open(img_paths[0]).convert('L') # Convert to grayscale + rgb_image = img0.convert('RGB') + # Convert to numpy arrays + img0_array = np.array(rgb_image) + + # Apply any transformations if specified + if self.transform: + img0_array = self.transform(img0_array) + + return img0_array, label + +class same_anatomical_planes_in_RGB_ensemble(Dataset): + def __init__(self, root_dir, transform=None): + """ + Args: + root_dir (str): Directory with all the images and subfolders (A, B, C, D, E). + transform (callable, optional): Optional transform to be applied on a sample. + """ + self.root_dir = root_dir + self.transform = transform + self.class_to_idx = self._create_class_to_idx() + self.samples = self._load_samples() + + def _create_class_to_idx(self): + """Creates a mapping from folder names to integer labels.""" + class_to_idx = { + 'FLAIR': 0, + 'FLAIRCE': 1, + 'OTHER': 2, + 'T1w': 3, + 'T1wCE': 4, + 'T2star': 5, + 'T2w': 6 + } + return class_to_idx + + def _load_samples(self): + """Collects all the image paths from the subfolders and associates labels based on folder name.""" + samples = [] + + # List all the subfolders A, B, C, D, E, etc. + for folder in os.listdir(self.root_dir): + folder_path = os.path.join(self.root_dir, folder) + + if os.path.isdir(folder_path): # Check if it is a folder + # List all files in the folder and sort them + folder_items = os.listdir(folder_path) + main_file_names = set([path.rsplit("_", 2)[0] for path in folder_items]) + for name in main_file_names: + if name != "index.html": + files_to_connect = sorted([path for path in folder_items if name in path]) + for i in range(0, len(files_to_connect), 3): + img_paths = [os.path.join(folder_path, files_to_connect[i]), + os.path.join(folder_path, files_to_connect[i+1]), + os.path.join(folder_path, files_to_connect[i+2])] + group = int(i/3) + label = self.class_to_idx[folder] + samples.append((img_paths, label, group)) + + return samples + + def __len__(self): + """Returns the number of samples.""" + return len(self.samples) + + def __getitem__(self, idx): + """Fetches an item and applies any transformations.""" + # Get the paths for the three images and label + img_paths, label, _ = self.samples[idx] + + # Load the three grayscale images + img0 = Image.open(img_paths[0]).convert('L') # Convert to grayscale + img1 = Image.open(img_paths[1]).convert('L') + img2 = Image.open(img_paths[2]).convert('L') + + # Convert to numpy arrays + img0_array = np.array(img0) + img1_array = np.array(img1) + img2_array = np.array(img2) + + # Stack into an RGB image + rgb_image = np.stack((img0_array, img1_array, img2_array), axis=-1) + + # Convert numpy array to PyTorch tensor (C, H, W) format + #rgb_image = torch.tensor(rgb_image).permute(2, 0, 1).float() # Convert to C, H, W + if self.transform: + rgb_image = self.transform(rgb_image) + + return rgb_image, label + + def split_dataset_based_on_number(samples): + """Splits the dataset into 3 datasets based on the number in the filename (0, 1, 2).""" + datasetAxial = [] + datasetCoronial = [] + datasetSagittal = [] + + # Divide samples into 3 datasets based on the `numberY` (0, 1, 2) + for img_paths, label, group in samples: + if group == 0: + datasetAxial.append((img_paths, label)) + elif group == 1: + datasetCoronial.append((img_paths, label)) + elif group == 2: + datasetSagittal.append((img_paths, label)) # 2nd group for datasetC + + return datasetAxial, datasetCoronial, datasetSagittal + +class ImageTripletDataset_to_three(Dataset): + def __init__(self, samples, transform=None): + """ + Args: + samples (list): List of tuples where each tuple is (img_paths, label). + transform (callable, optional): Optional transform to be applied on a sample. + """ + self.class_to_idx = self._create_class_to_idx() + self.samples = samples + self.transform = transform + + def _create_class_to_idx(self): + """Creates a mapping from folder names to integer labels.""" + class_to_idx = { + 'FLAIR': 0, + 'FLAIRCE': 1, + 'OTHER': 2, + 'T1w': 3, + 'T1wCE': 4, + 'T2star': 5, + 'T2w': 6 + } + return class_to_idx + + def __len__(self): + return len(self.samples) + + def __getitem__(self, idx): + # Get the paths for the three images and label + img_paths, label = self.samples[idx] + + # Load the three grayscale images + img0 = Image.open(img_paths[0]).convert('L') # Convert to grayscale + img1 = Image.open(img_paths[1]).convert('L') + img2 = Image.open(img_paths[2]).convert('L') + + # Convert to numpy arrays + img0_array = np.array(img0) + img1_array = np.array(img1) + img2_array = np.array(img2) + + # Stack into a single multi-channel image (H, W, C) + rgb_image = np.stack((img0_array, img1_array, img2_array), axis=-1) + + # Apply any transformations if specified + if self.transform: + rgb_image = self.transform(rgb_image) + + return rgb_image, label + +class same_anatomical_plane_in_rgb(Dataset): + def __init__(self, root_dir, transform=None): + """ + Args: + root_dir (str): Directory with all the images and subfolders (A, B, C, D, E). + transform (callable, optional): Optional transform to be applied on a sample. + """ + self.root_dir = root_dir + self.transform = transform + self.class_to_idx = self._create_class_to_idx() + self.samples = self._load_samples() + self.targets = [label for _, label in self.samples] + + def _create_class_to_idx(self): + """Creates a mapping from folder names to integer labels.""" + class_to_idx = { + 'FLAIR': 0, + 'FLAIRCE': 1, + 'OTHER': 2, + 'T1w': 3, + 'T1wCE': 4, + 'T2star': 5, + 'T2w': 6 + } + return class_to_idx + + def _load_samples(self): + """Collects all the image paths from the subfolders and associates labels based on folder name.""" + samples = [] + + # List all the subfolders A, B, C, D, E, etc. + for folder in os.listdir(self.root_dir): + folder_path = os.path.join(self.root_dir, folder) + + if os.path.isdir(folder_path): # Check if it is a folder + # List all files in the folder and sort them + folder_items = os.listdir(folder_path) + main_file_names = set([path.rsplit("_", 2)[0] for path in folder_items]) + for name in main_file_names: + if name != "index.html": + files_to_connect = sorted([path for path in folder_items if name in path]) + for i in range(0, len(files_to_connect), 3): + img_paths = [os.path.join(folder_path, files_to_connect[i]), + os.path.join(folder_path, files_to_connect[i+1]), + os.path.join(folder_path, files_to_connect[i+2])] + + label = self.class_to_idx[folder] + samples.append((img_paths, label)) + + return samples + + def __len__(self): + """Returns the number of samples.""" + return len(self.samples) + + def __getitem__(self, idx): + """Fetches an item and applies any transformations.""" + # Get the paths for the three images and label + img_paths, label = self.samples[idx] + + # Load the three grayscale images + img0 = Image.open(img_paths[0]).convert('L') # Convert to grayscale + img1 = Image.open(img_paths[1]).convert('L') + img2 = Image.open(img_paths[2]).convert('L') + + # Convert to numpy arrays + img0_array = np.array(img0) + img1_array = np.array(img1) + img2_array = np.array(img2) + + # Stack into an RGB image + rgb_image = np.stack((img0_array, img1_array, img2_array), axis=-1) + + # Convert numpy array to PyTorch tensor (C, H, W) format + #rgb_image = torch.tensor(rgb_image).permute(2, 0, 1).float() # Convert to C, H, W + if self.transform: + rgb_image = self.transform(rgb_image) + + return rgb_image, label diff --git a/build_models/parameters.py b/build_models/parameters.py new file mode 100644 index 0000000000000000000000000000000000000000..40db229ffd9c82879aa25b0f3a0bbfd4e0765eca --- /dev/null +++ b/build_models/parameters.py @@ -0,0 +1,68 @@ +#basic +"""PIPELINE = "basic" +DATA_PATH = '/net/travail/bformanek/MRI_dataset' +SPECIALIZATION = "_basic_N1" + +PIPELINE = "basic" +DATA_PATH = '/net/travail/mvajay/TRDP/N2' +SPECIALIZATION = "_basic_N2" + + +PIPELINE = "basic" +DATA_PATH = '/net/travail/mvajay/TRDP/N3' +SPECIALIZATION = "_basic_N3" + +#different anatomical planes in RGB +PIPELINE = "different_anatomical_plane_in_rgb" +DATA_PATH = '/net/travail/bformanek/MRI_dataset' +SPECIALIZATION = "_different_anatomical_plane_in_rgb_N1" +""" +#same anatomical planes in RGB +PIPELINE = "same_anatomical_plane_in_rgb" +DATA_PATH = '/net/travail/mvajay/TRDP/N3_c1' +SPECIALIZATION = "_same_anatomical_plane_in_rgb_N3_c1" +"""" +PIPELINE = "same_anatomical_plane_in_rgb" +DATA_PATH = '/net/travail/mvajay/TRDP/N3_c2' +SPECIALIZATION = "_same_anatomical_plane_in_rgb_N3_c2" + +#different anatomical plane ensemble +PIPELINE = "different_anatomical_plane_ensemble" +DATA_PATH = '/net/travail/bformanek/MRI_dataset' +SPECIALIZATION = "_different_anatomical_plane_ensemble_N1" + +#ensemble same anatomical planes in RGB +PIPELINE = "ensemble_same_anatomical_planes_in_RGB" +DATA_PATH = '/net/travail/mvajay/TRDP/N3_c1' +SPECIALIZATION = "_ensemble_same_anatomical_planes_in_RGB_N3_c1" + +PIPELINE = "ensemble_same_anatomical_planes_in_RGB" +DATA_PATH = '/net/travail/mvajay/TRDP/N3_c2' +SPECIALIZATION = "_ensemble_same_anatomical_planes_in_RGB_N3_c2" +""" +## COMMON PARAMETERS +DEVICE = 'cuda' # 'cuda' or 'cpu' + +TRAIN_FOLDER = DATA_PATH + '/train' +VAL_FOLDER = DATA_PATH + '/val' +TEST_FOLDER = DATA_PATH + '/test' + +MODEL_NAME = 'resnet18.a2_in1k' + +BATCH_SIZE = 64 +WORKERS = 8 +EPOCHS = 60 + +SAVE_FOLDER_MAIN = '/net/travail/mvajay/TRDP/' + +#BEST_EPOCH = 23 + +CHECKPOINTS_FOLDER = SAVE_FOLDER_MAIN+'checkpoints/transfer_checkpoints_'+ MODEL_NAME + SPECIALIZATION +SAVE_PREDICTION_PATH = '/net/cremi/mvajay/TRDP/predictions/' + MODEL_NAME + SPECIALIZATION + "_b_epoch_" +SAVE_MISCLASSIFIED_PATH = '/net/cremi/mvajay/TRDP/misclassified_data/'+ MODEL_NAME +'_mislabelled_majority_voting'+'.json' +SAVE_CONFUSIONM_PATH = '/net/cremi/mvajay/TRDP/confusionM/' + MODEL_NAME + '_majority_voting'+'.png' +SAVE_INPUT_VISUALIZATION_PATH = '/net/cremi/mvajay/TRDP/input_visualization/' + MODEL_NAME + SPECIALIZATION +SAVE_PATH_LEARNING_CURVES = SAVE_FOLDER_MAIN + "learning_curves/" + MODEL_NAME + SPECIALIZATION + +LOG_PATH = '/net/cremi/mvajay/TRDP/logs/'+ MODEL_NAME + SPECIALIZATION + ".log" + diff --git a/build_models/run_combinations.py b/build_models/run_combinations.py new file mode 100644 index 0000000000000000000000000000000000000000..0c9d7441e27bbf17a4f5126579fcfb01c0a02cfa --- /dev/null +++ b/build_models/run_combinations.py @@ -0,0 +1,69 @@ +import os +import subprocess + +# Define the parameter combinations +configurations = [ + {"PIPELINE": "basic", "DATA_PATH": "/net/travail/bformanek/MRI_dataset", "SPECIALIZATION": "_basic_N1_aug_flip_rot_scale"}, + {"PIPELINE": "basic", "DATA_PATH": "/net/travail/mvajay/TRDP/N2", "SPECIALIZATION": "_basic_N2_aug_flip_rot_scale"}, + {"PIPELINE": "basic", "DATA_PATH": "/net/travail/mvajay/TRDP/N3", "SPECIALIZATION": "_basic_N3_aug_flip_rot_scale"}, + {"PIPELINE": "different_anatomical_plane_in_rgb", "DATA_PATH": "/net/travail/bformanek/MRI_dataset", "SPECIALIZATION": "_different_anatomical_plane_in_rgb_N1_aug_flip_rot_scale"}, + {"PIPELINE": "same_anatomical_plane_in_rgb", "DATA_PATH": "/net/travail/mvajay/TRDP/N3_c1", "SPECIALIZATION": "_same_anatomical_plane_in_rgb_N3_c1_aug_flip_rot_scale"}, + {"PIPELINE": "same_anatomical_plane_in_rgb", "DATA_PATH": "/net/travail/mvajay/TRDP/N3_c2", "SPECIALIZATION": "_same_anatomical_plane_in_rgb_N3_c2_aug_flip_rot_scale"}, + {"PIPELINE": "different_anatomical_plane_ensemble", "DATA_PATH": "/net/travail/bformanek/MRI_dataset", "SPECIALIZATION": "_different_anatomical_plane_ensemble_N1_aug_flip_rot_scale"}, + {"PIPELINE": "ensemble_same_anatomical_planes_in_RGB", "DATA_PATH": "/net/travail/mvajay/TRDP/N3_c1", "SPECIALIZATION": "_ensemble_same_anatomical_planes_in_RGB_N3_c1_aug_flip_rot_scale"}, + {"PIPELINE": "ensemble_same_anatomical_planes_in_RGB", "DATA_PATH": "/net/travail/mvajay/TRDP/N3_c2", "SPECIALIZATION": "_ensemble_same_anatomical_planes_in_RGB_N3_c2_aug_flip_rot_scale"} +] + +# Path to parameters.py and main.py +parameters_file = "parameters.py" +main_script = "main.py" + +# Backup the original parameters file +if not os.path.exists(f"{parameters_file}.backup"): + os.rename(parameters_file, f"{parameters_file}.backup") + +# Function to update parameters.py +def update_parameters(config): + with open(parameters_file, "w") as file: + file.write(f"""PIPELINE = "{config['PIPELINE']}"\n""") + file.write(f"""DATA_PATH = "{config['DATA_PATH']}"\n""") + file.write(f"""SPECIALIZATION = "{config['SPECIALIZATION']}"\n\n""") + file.write(""" +# COMMON PARAMETERS +DEVICE = 'cuda' # 'cuda' or 'cpu' + +TRAIN_FOLDER = DATA_PATH + '/train' +VAL_FOLDER = DATA_PATH + '/val' +TEST_FOLDER = DATA_PATH + '/test' + +MODEL_NAME = 'resnet18' + +BATCH_SIZE = 64 +WORKERS = 8 +EPOCHS = 60 + +SAVE_FOLDER_MAIN = '/net/travail/mvajay/TRDP/' + +CHECKPOINTS_FOLDER = SAVE_FOLDER_MAIN+'checkpoints/transfer_checkpoints_'+ MODEL_NAME + SPECIALIZATION +SAVE_PREDICTION_PATH = SAVE_FOLDER_MAIN+'predictions/' + MODEL_NAME + SPECIALIZATION + "_b_epoch_" +SAVE_MISCLASSIFIED_PATH = SAVE_FOLDER_MAIN+'misclassified_data/'+ MODEL_NAME + SPECIALIZATION +'_mislabelled_majority_voting'+'.json' +SAVE_CONFUSIONM_PATH = SAVE_FOLDER_MAIN+'confusionM/' + MODEL_NAME + SPECIALIZATION + '_majority_voting'+'.png' +SAVE_INPUT_VISUALIZATION_PATH = SAVE_FOLDER_MAIN+'input_visualization/' + MODEL_NAME + SPECIALIZATION +SAVE_PATH_LEARNING_CURVES = SAVE_FOLDER_MAIN + "learning_curves/" + MODEL_NAME + SPECIALIZATION + +LOG_PATH = SAVE_FOLDER_MAIN+'logs/'+ MODEL_NAME + SPECIALIZATION + ".log" +""") + +# Iterate over each configuration +for config in configurations: + print(f"Running configuration: {config}") + # Update the parameters.py file + update_parameters(config) + # Execute the main.py script + try: + subprocess.run(["python", main_script], check=True) + except subprocess.CalledProcessError as e: + print(f"Error while running configuration {config}: {e}") + +# Restore the original parameters.py file +os.rename(f"{parameters_file}.backup", parameters_file) diff --git a/build_models/test_functions.py b/build_models/test_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..915007c26491b45b2d5c4965df3e264607c0726e --- /dev/null +++ b/build_models/test_functions.py @@ -0,0 +1,133 @@ +import torch +import numpy as np +import torch.nn.functional as F +from collections import defaultdict + +from parameters import DEVICE + +def predict(model, data_loader): + model.eval() + + # save the predictions in this list + y_pred = [] + + # no gradient needed + with torch.no_grad(): + gt_labels_conf = [] + # go over each batch in the loader. We can ignore the targets here + for batch, gt_label in data_loader: + + # Move batch to the GPU + batch = batch.to(DEVICE) + + # predict probabilities of each class + predictions = model(batch) + + # apply a softmax to the predictions + predictions = F.softmax(predictions, dim=1) + + # move to the cpu and convert to numpy + predictions = predictions.cpu().numpy() + + # save + y_pred.append(predictions) + # Get batch indices + batch_indices = np.arange(len(gt_label)) + # Extract ground truth confidences + gt_confidences = predictions[batch_indices, gt_label.numpy()] + gt_labels_conf.extend(gt_confidences) + #gt_labels_conf.append(predictions[gt_label]) + # stack predictions into a (num_samples, 10) array + y_pred = np.vstack(y_pred) + return y_pred, gt_labels_conf + +def averaging_prediction(y_pred, test_set, test_image_paths): + gt_y = np.array(test_set.targets) # get the true labels and convert to numpy + grouped_predictions = defaultdict(list) + + for i, image_path in enumerate(test_image_paths): + # Strip last 5 characters (this should group the same items) + if isinstance(image_path, list): + base_name = image_path[0][:-10] + else: + base_name = image_path[:-10] + # Append the prediction probabilities for the current image to the corresponding group + grouped_predictions[base_name].append(y_pred[i]) + + ground_truth_dict = {} + + for i, image_path in enumerate(test_image_paths): + if isinstance(image_path, list): + base_name = image_path[0][:-10] + else: + base_name = image_path[:-10] + + if base_name not in ground_truth_dict: + ground_truth_dict[base_name] = gt_y[i] + elif ground_truth_dict[base_name] != gt_y[i]: + print(f"WARNING: The ground truth label differs from the same image but from different views") + + final_predictions = {} + + for base_name, preds in grouped_predictions.items(): + preds_array = np.array(preds) + + combined_probs = np.sum(preds_array, axis=0) / 3 + + final_class = np.argmax(combined_probs) + final_predictions[base_name] = final_class + + return grouped_predictions, ground_truth_dict, final_predictions + +def predict_ENSEMBLE_with_confidence(model1, model2, model3, data_loader1, data_loader2, data_loader3, device): + # Set models to evaluation mode + model1.eval() + model2.eval() + model3.eval() + + # To store final predictions, true labels, and confidence scores + all_majority_preds = [] + true_labels = [] + confidence_scores_majority = [] # Confidence of the majority-voted prediction + confidence_scores_true = [] # Confidence of the ground truth label + + # No gradient calculation needed + with torch.no_grad(): + # Iterate over each batch from the three data loaders in sync + for (batch1, targets1), (batch2, _), (batch3, _) in zip(data_loader1, data_loader2, data_loader3): + # Extend true labels from the first loader + true_labels.extend(targets1.cpu().numpy()) + + # Move each batch to the device + batch1, batch2, batch3 = batch1.to(device), batch2.to(device), batch3.to(device) + + # Get predictions from each model and apply softmax to get probabilities + probs1 = F.softmax(model1(batch1), dim=1).cpu().numpy() + probs2 = F.softmax(model2(batch2), dim=1).cpu().numpy() + probs3 = F.softmax(model3(batch3), dim=1).cpu().numpy() + + # Stack probabilities to form an array of shape (batch_size, 3, num_classes) + stacked_probs = np.stack([probs1, probs2, probs3], axis=1) # (batch_size, 3, num_classes) + + # Compute the average probabilities across models for each class + avg_probs = np.mean(stacked_probs, axis=1) # (batch_size, num_classes) + + # Get the predicted class (majority) and confidence for each sample + majority_preds = avg_probs.argmax(axis=1) + confidence_scores_majority.extend(avg_probs.max(axis=1)) + + # Get confidence scores for the true labels + true_confidences = avg_probs[np.arange(len(targets1)), targets1.cpu().numpy()] + confidence_scores_true.extend(true_confidences) + + # Collect final majority-voted predictions + all_majority_preds.extend(majority_preds) + + # Convert lists to numpy arrays for easy analysis if needed + all_majority_preds = np.array(all_majority_preds) + true_labels = np.array(true_labels) + confidence_scores_majority = np.array(confidence_scores_majority) + confidence_scores_true = np.array(confidence_scores_true) + + return all_majority_preds, true_labels, confidence_scores_majority, confidence_scores_true + diff --git a/build_models/train_functions.py b/build_models/train_functions.py new file mode 100644 index 0000000000000000000000000000000000000000..06faf4b074ef320ffe1f22718918d34602a60632 --- /dev/null +++ b/build_models/train_functions.py @@ -0,0 +1,122 @@ +import torch +import numpy as np + +def train_for_epoch_with_scaler(model, train_loader, optimizer, criterion, scaler, device): + # set model to train + model.train() + + train_losses = [] + train_accuracies = [] + counter = 0 + + for batch, target in train_loader: + + # data to GPU + batch = batch.to(device) + target = target.to(device) + + # reset optimizer + optimizer.zero_grad() + + # forward pass + predictions = model(batch) + + # calculate accuracy + accuracy = (torch.argmax(predictions, dim=1) == target).sum().item() / target.size(0) + + # calculate loss + loss = criterion(predictions, target) + + # backward pass + scaler.scale(loss).backward() + + # parameter update + scaler.step(optimizer) + scaler.update() + + # track loss + train_losses.append(float(loss.item())) + train_accuracies.append(accuracy) + + counter += 1 + if counter % 20 == 0: + print('[{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + int(counter * len(batch)), len(train_loader.dataset), + 100. * counter / len(train_loader), loss.item())) + + train_loss = np.mean(np.array(train_losses)) + train_accuracy = np.mean(np.array(train_accuracies)) + + print('\nTrain: Average loss: {:.4f}, Accuracy: {:.4f}\n'.format( + train_loss, train_accuracy)) + + return train_loss, train_accuracy + +def validate(model, val_loader, criterion, device): + model.eval() + + val_losses = [] + y_true, y_pred = [], [] + + with torch.no_grad(): + for batch, target in val_loader: + + # move data to the device + batch = batch.to(device) + target = target.to(device) + + with torch.autocast(device_type=device, dtype=torch.float16): + # make predictions + predictions = model(batch) + + # calculate loss + loss = criterion(predictions, target) + + # track losses and predictions + val_losses.append(float(loss.item())) + y_true.extend(target.cpu().numpy()) + y_pred.extend(predictions.argmax(dim=1).cpu().numpy()) + + y_true = np.array(y_true) + y_pred = np.array(y_pred) + val_losses = np.array(val_losses) + + # calculate validation accuracy from y_true and y_pred + val_accuracy = np.mean(y_true == y_pred) + + # calculate the mean validation loss + val_loss = np.mean(val_losses) + + print('Validation: Average loss: {:.4f}, Accuracy: {:.4f}\n'.format( + val_loss, val_accuracy)) + + return val_loss, val_accuracy + +def train_with_scaler(model, train_loader, val_loader, optimizer, criterion, epochs, scaler, device, checkpoints_foler = None, first_epoch=1): + train_losses, val_losses = [], [] + train_accuracies, val_accuracies = [], [] + max_val_acc = 0 + best_epoch = 0 + + for epoch in range(first_epoch, epochs+first_epoch): + + print('Train Epoch: {}'.format(epoch)) + + # train + train_loss, train_acc = train_for_epoch_with_scaler(model, train_loader, optimizer, criterion, scaler, device) + + # validation + valid_loss, valid_acc = validate(model, val_loader, criterion, device) + + train_losses.append(train_loss) + val_losses.append(valid_loss) + train_accuracies.append(train_acc) + val_accuracies.append(valid_acc) + + # save checkpoint + if checkpoints_foler != None and max_val_acc < valid_acc: + max_val_acc = valid_acc + best_epoch = epoch + torch.save(model, checkpoints_foler+f'/avp_{epoch:03d}.pkl') + + return train_losses, val_losses, train_accuracies, val_accuracies, best_epoch \ No newline at end of file