Thesis work.

b73bc61c · Harascsák Ádám · b73bc61c · b73bc61c · b73bc61c · b73bc61c
Commit b73bc61c authored 3 years ago by Harascsák Ádám
--- a/artefact_1_AMI.py
+++ b/artefact_1_AMI.py
+import numpy as np
+import math
+
+def calc_MI(X,Y,bins):
+   c_XY = np.histogram2d(X,Y,bins)[0]
+   c_X = np.histogram(X,bins)[0]
+   c_Y = np.histogram(Y,bins)[0]
+
+   H_X = shan_entropy(c_X)
+   H_Y = shan_entropy(c_Y)
+   H_XY = shan_entropy(c_XY)
+
+   MI = H_X + H_Y - H_XY
+   return MI
+
+def shan_entropy(c):
+    c_normalized = c / float(np.sum(c))
+    c_normalized = c_normalized[np.nonzero(c_normalized)]
+    H = -sum(c_normalized* np.log2(c_normalized))
+    return H
+
+""" Step 4).1: Auto-Mutal Informatin """
+def Auto_Mutual_Information(ICs_projections, lag_offset, max_value, min_value):
+    marked_ICs = np.zeros(64)
+    pre_AMIs = []
+    for i in range(np.size(ICs_projections, 2)):
+        actual_projection = np.transpose(ICs_projections[:, :, i])
+        lag_offsets = np.array([lag_offset])
+        cAMIs = []
+        for lagNo in range(np.size(lag_offsets)):
+            lags = np.arange(0, math.floor(np.size(ICs_projections, 1) / 2), lag_offsets[lagNo])
+            end_p = np.size(ICs_projections, 1)
+            for chNo in range(np.size(actual_projection, 0)):
+                actual_AMIs = []
+                for k in range(np.size(lags)):
+                    actual_AMIs.append(
+                        calc_MI(actual_projection[chNo, 0:end_p - lags[k]], actual_projection[chNo, lags[k]:end_p], 5))
+                cAMIs.append(np.mean(actual_AMIs))
+        pre_AMIs.append(np.max(cAMIs))
+    AMIs = np.array(pre_AMIs)
+
+    thresholdMax = max_value
+    thresholddMin = min_value
+
+    for i in range(np.size(AMIs)):
+        if (AMIs[i] > thresholdMax or AMIs[i] < thresholddMin):
+            marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_2_Spiking.py
+++ b/artefact_2_Spiking.py
+import numpy as np
+from scipy.signal import find_peaks
+
+""" Step 4).2: Spiking activity """
+def Spiking_activity(S,  coefficientThreshold):
+    pre_spike_threshold = []
+    marked_ICs = np.zeros(64)
+    IC = S.T
+    for i in range(np.size(IC, 0)):
+        peaks_actual, _ = find_peaks(np.abs(IC[i, :]))
+        actual_peak_value = np.abs(IC[i, peaks_actual])
+        mean_actual = np.mean(actual_peak_value)
+        std_actual = np.std(actual_peak_value)
+        threshold_actual = mean_actual + (3 * std_actual)
+        pre_spike_threshold.append(threshold_actual)
+
+        if np.max(actual_peak_value) > threshold_actual:
+            marked_ICs[i] = marked_ICs[i] + 1
+
+        """ -- Spike zone coefficients -- """
+        actual_signal = np.abs(IC[i])
+        pre_actual_spike_position = []
+        pre_actual_Co = []
+        for j in range(1, np.size(actual_signal) - 1):
+            if (actual_signal[j] > actual_signal[j - 1] and actual_signal[j] > actual_signal[j + 1]):
+                pre_actual_spike_position.append(j)
+                # Pythonban [x:y] = [x,y) => mivel az utolsó index nincs benne, j-1:j+2!
+                actual_mean = np.mean(actual_signal[j - 1:j + 2])
+                actual_std = np.std(actual_signal[j - 1:j + 2])
+                pre_actual_Co.append(actual_mean / actual_std)
+        actual_Co = np.array(pre_actual_Co)
+        actual_spike_position = np.array(pre_actual_spike_position)
+        threshold = 0.1 * (np.mean(actual_Co) + np.std(actual_Co))
+
+        if (np.size(actual_Co) > 0):
+            high_Co_counter = 0
+            for k in range(np.size(actual_spike_position)):
+                if (actual_Co[k] > threshold):
+                    high_Co_counter += 1
+            no_spikes = np.size(high_Co_counter) / np.size(actual_spike_position)
+        else:
+            no_spikes = 0
+
+        if (no_spikes > coefficientThreshold):
+            marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_3_Kurtosis.py
+++ b/artefact_3_Kurtosis.py
+import numpy as np
+from scipy.stats import kurtosis
+
+""" 4).3 Kurtosis """
+def Kurtosis(ICs_projections):
+    pre_IC_kurtosis = []
+    marked_ICs = np.zeros(64)
+    for i in range(np.size(ICs_projections, 2)):
+        actual_projection = np.transpose(ICs_projections[:, :, i])
+        pre_kurtosis = []
+        for k in range(np.size(actual_projection, 0)):
+            actual_kurtosis = kurtosis(actual_projection[k, :])
+            pre_kurtosis.append(actual_kurtosis)
+        pre_IC_kurtosis.append(np.mean(np.array(pre_kurtosis)))
+    IC_kurtosis = np.array(pre_IC_kurtosis)
+
+    mu = np.mean(IC_kurtosis)
+    sigma = np.std(IC_kurtosis)
+    for i in range(np.size(IC_kurtosis)):
+        k = IC_kurtosis[i]
+        if k > (mu + (0.5 * sigma)):
+            marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_4_5_PSD.py
+++ b/artefact_4_5_PSD.py
+import numpy as np
+import math
+from scipy.fft import fft, ifft
+
+def indices(a, func):
+    return [i for (i, val) in enumerate(a) if func(val)]
+
+def nextpow2(x):
+    """ Function for finding the next power of 2 """
+    return 1 if x == 0 else math.ceil(math.log2(x))
+
+def powerspectrum(data, Fs):
+    T = 1 / Fs
+    L = np.size(data, 0)
+    NFFT = 2 ** nextpow2(L)
+    Y = fft(data, NFFT) / L
+    f = Fs / 2 * np.linspace(0, 1, int(NFFT / 2))
+    power = 2 * np.abs(Y[0:int(NFFT / 2)])
+    return f, power
+
+""" 4).4-5 Check PSDs + PSD of gamma frequency """
+def PSD(ICs_projections, FS, distance, thGamma):
+    gammaPSDT = []
+    diff = []
+    marked_ICs = np.zeros(64)
+    for i in range(np.size(ICs_projections, 2)):
+        if (marked_ICs[i] <= 3):
+            actual_projection = np.transpose(ICs_projections[:, :, i])
+            pre_diff = []
+            pre_gamma = []
+            for k in range(np.size(actual_projection, 0)):
+                f, power = powerspectrum(actual_projection[k, :], FS)
+                actual_idealDistro1 = f[1:]
+                actual_idealDistro2 = np.divide(1, f[1:])
+
+                normedActual_idealDistro2 = np.divide(actual_idealDistro2, np.max(actual_idealDistro2))
+                normedPsCheck = np.divide(power[1:], np.max(power[1:]))
+
+                actual_diff = normedPsCheck - normedActual_idealDistro2
+
+                actual_specDistT = np.mean(math.sqrt(np.dot(actual_diff, actual_diff.T)))
+                pre_diff.append(actual_specDistT)
+                actual_gammaPSDT = np.mean(power[f > 30])
+                pre_gamma.append(actual_gammaPSDT)
+
+            diff.append(np.mean(np.array(pre_diff)))
+            gammaPSDT.append(np.mean(np.array(pre_gamma)))
+        else:
+            diff.append(-1)
+            gammaPSDT.append(-1)
+    gammaIC = np.array(gammaPSDT)
+    diffIC = np.array(diff)
+
+    for i in range(np.size(ICs_projections, 2)):
+        if (diffIC[i] > distance):
+            marked_ICs[i] = marked_ICs[i] + 1
+        if (gammaIC[i] > thGamma):
+            marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_6_Projection_STD.py
+++ b/artefact_6_Projection_STD.py
+import numpy as np
+
+""" 4).6 Check stds of projections of ICs. """
+def Projection_STD(ICs_projections):
+    pre_thetaProjection = []
+    marked_ICs = np.zeros(64)
+    for i in range(np.size(ICs_projections, 2)):
+        actual_projection = np.transpose(ICs_projections[:,:, i])
+        pre_stds = []
+        for k in range(np.size(actual_projection, 0)):
+            actual_stds = np.std(actual_projection[k])
+            pre_stds.append(actual_stds)
+        stds = np.array(pre_stds)
+        pre_thetaProjection.append(np.mean(stds))
+    thetaProjection = np.array(pre_thetaProjection)
+
+    mean_theta = np.mean(thetaProjection)
+    sigma_theta = np.std(thetaProjection)
+
+    for i in range(np.size(ICs_projections, 2)):
+        if (thetaProjection[i] > (mean_theta + (2 * sigma_theta))):
+            marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_7_Topographic_distribution.py
+++ b/artefact_7_Topographic_distribution.py
+import numpy as np
+
+""" 4).7 Topographic distribution of standard deviations """
+def Topographic_distribution(ICs_projections, info):
+    pre_frontal_chanels = []
+    pre_other_chanels = []
+    marked_ICs = np.zeros(64)
+    for i in range(np.size(info['ch_names'])):
+        if ((info['ch_names'][i].find('F') != -1) or ((info['ch_names'][i].find('f') != -1))):
+            pre_frontal_chanels.append(i)
+        else:
+            pre_other_chanels.append(i)
+    frontal_chanels = np.array(pre_frontal_chanels)
+    other_chanels = np.array(pre_other_chanels)
+
+    pre_R = []
+    for i in range(np.size(ICs_projections, 2)):
+        actual_projection = np.transpose(ICs_projections[:, :, i])
+        actual_std_front = np.std(actual_projection[frontal_chanels, :])
+        actual_std_other = np.std(actual_projection[other_chanels, :])
+        actual_R = actual_std_front / actual_std_other
+        pre_R.append(actual_R)
+
+    R = np.array(pre_R)
+    mean_R = np.mean(R)
+    std_R = np.std(R)
+    for i in range(np.size(R, 0)):
+        if (R[i] > (mean_R + std_R)):
+            marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_8_Amplitude_thresholding.py
+++ b/artefact_8_Amplitude_thresholding.py
+import numpy as np
+
+def Amplitude_thresholding(ICs_projections, thAmplitude, peakDifference):
+    marked_ICs = np.zeros(64)
+
+    for i in range(np.size(ICs_projections, 2)):
+        if (marked_ICs[i] <= 3):
+            actual_projection = np.transpose(ICs_projections[:, :, i])
+            actual_mean = np.mean(actual_projection, 0)
+            max_value = np.max(actual_mean)
+            min_value = np.min(actual_mean)
+            diff = max_value - min_value
+            if (max_value > thAmplitude or min_value < -(thAmplitude)):
+                marked_ICs[i] = marked_ICs[i] + 1
+            if (diff > peakDifference):
+                marked_ICs[i] = marked_ICs[i] + 1
+
+    return marked_ICs
\ No newline at end of file
--- a/artefact_force.py
+++ b/artefact_force.py
+import pywt
+import concurrent.futures
+import matplotlib.pyplot as plt
+from mne.io import read_raw_edf
+from preprocess.artefact_1_AMI import *
+from preprocess.artefact_2_Spiking import *
+from preprocess.artefact_3_Kurtosis import *
+from preprocess.artefact_4_5_PSD import *
+from preprocess.artefact_6_Projection_STD import *
+from preprocess.artefact_7_Topographic_distribution import *
+from preprocess.artefact_8_Amplitude_thresholding import *
+from preprocess.artefact_Spike_zone_thresholding import *
+from preprocess.artefact_PSD import *
+from sklearn.decomposition import FastICA
+
+""" This implementation is based on FORCe: Fully Online and Automated Artifact
+Removal for Brain-Computer Interfacing by Ian Daly, Reinhold Scherer, Martin Billinger, and Gernot Müller-Putz."""
+
+""" Constants: """
+# 4.1 AMI
+LAG_OFFSET = 60
+THRESHOLD_MAX = 2.0
+THRESHOLD_MIN = 1.0
+# 4.2 Spiking
+THRESHOLD_COEFF = 0.25
+# 4.4-5 PSD + Gamma
+DISTANCE = 3.5
+THRESHOLD_GAMMA = 1.7
+# 4.8 Large amplitude removing
+THRESHOLD_AMPLITUDE = 100
+PEAK_DIFFERENCE = 60
+# 5 Spike-zone thresholding
+DC_checkVal = 0.2
+DC_adjustVal = 0.07
+AC_checkVal = 0.7
+AC_adjustVal = 0.8
+
+REMOVING_THRESHOLD = 3
+
+
+def FORCe(info, original_data):
+    nCh = len(info['chs'])
+    data = np.multiply(original_data, 1e6)
+
+    """ Step 1) - 4) """
+    waveletname = 'sym4'
+    coeffitiantsLevel1 = []
+    coeffitiantsLevel2 = []
+    cA2 = []
+    cD1 = []
+    cD2 = []
+    marked_ICs = np.zeros(nCh)
+
+    """
+    # ''cAx'' is he array of approximation coefficient (low pass filters) and
+    # ''cDx'' is the list of detail coefficients (high pass filters).
+    # 2 level decomposition!
+    """
+
+    for i in range(np.size(data, 0)):
+        actual_coeff = pywt.wavedec(data[i][:], waveletname, level=1)
+        cA1_actual, cD1_actual = actual_coeff
+        coeffitiantsLevel1.append(actual_coeff)
+        cD1.append(cD1_actual)
+    D1 = np.array(cD1)
+
+    for i in range(np.size(data, 0)):
+        actual_coeff = pywt.wavedec(data[i][:], waveletname, level=2)
+        cA2_actual, cD2_actual, cD1_actual = actual_coeff
+        coeffitiantsLevel2.append(actual_coeff)
+        cA2.append(cA2_actual)
+        cD2.append(cD2_actual)
+    A2 = np.array(cA2)
+    D2 = np.array(cD2)
+
+    """
+    # S: 2D array containing estimated source signals
+    # A: 2D array containing mixing matrix, i.e. A.dot(S) = X
+    """
+    ica = FastICA(n_components=len(info['chs']), algorithm='parallel')
+    S = ica.fit(A2.T).transform(A2.T)
+    A = ica.mixing_
+    assert np.allclose(A2.T, np.dot(S, A.T) + ica.mean_)
+    assert A.shape[0] == A.shape[1]
+
+    ICs_projections = np.zeros((np.size(S, 0), np.size(S, 1), np.size(S, 1)))
+    for i in range(np.size(S, 1)):
+        actual_S = np.copy(S)
+        for j in range(np.size(S, 1)):
+            if (j != i):
+                actual_S[:, j] = 0
+        actual_projection = np.dot(actual_S, A.T)
+        ICs_projections[:, :, i] = actual_projection
+
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        toRemoveICs1 = executor.submit(Auto_Mutual_Information, ICs_projections, LAG_OFFSET, THRESHOLD_MAX, THRESHOLD_MIN)
+        toRemoveICs2 = executor.submit(Spiking_activity, S, THRESHOLD_COEFF)
+        toRemoveICs3 = executor.submit(Kurtosis, ICs_projections)
+        toRemoveICs4 = executor.submit(PSD, ICs_projections, info['sfreq'], DISTANCE, THRESHOLD_GAMMA)
+        toRemoveICs5 = executor.submit(Projection_STD, ICs_projections)
+        toRemoveICs6 = executor.submit(Topographic_distribution, ICs_projections, info)
+        toRemoveICs7 = executor.submit(Amplitude_thresholding, ICs_projections, THRESHOLD_AMPLITUDE, PEAK_DIFFERENCE)
+
+    for i in range(np.size(marked_ICs)):
+         marked_ICs[i] = toRemoveICs1.result()[i] + toRemoveICs2.result()[i] + toRemoveICs3.result()[i] + toRemoveICs4.result()[i] + toRemoveICs5.result()[i] + toRemoveICs6.result()[i] + toRemoveICs7.result()[i]
+
+    # Without paralellisation:
+    """ Step 4).1: Auto-Mutal Informatin """
+    # toRemoveICs1 = Auto_Mutual_Information(ICs_projections, LAG_OFFSET, THRESHOLD_MAX, THRESHOLD_MIN, marked_ICs)
+    # print('Marked to remove by AMI: ', toRemoveICs1)
+
+    """ Step 4).2: Spiking activity """
+    # toRemoveICs2a, toRemoveICs2b = Spiking_activity(S, THRESHOLD_COEFF, marked_ICs)
+    # print('Marked to remove by Spiking-activity: ', toRemoveICs2a)
+    # print('Marked to remove by Spike zone coefficients: ', toRemoveICs2b)
+
+    """ 4).3 Kurtosis """
+    # toRemoveICs3 = Kurtosis(ICs_projections, marked_ICs)
+    # print('Marked to remove by Kurtosis : ', toRemoveICs3)
+
+    """ 4).4-5 Check PSDs + PSD of gamma frequency """
+    # toRemoveICs4, toRemoveICs5 = PSD(ICs_projections, info['sfreq'], DISTANCE, THRESHOLD_GAMMA, marked_ICs)
+    # print('Marked to remove by PSD & 1/F distribution: ', toRemoveICs4)
+    # print('Marked to remove by Gamma frequency: ', toRemoveICs5)
+
+    """ 4).6 Check stds of projections of ICs. """
+    # toRemoveICs6 = Projection_STD(ICs_projections, marked_ICs)
+    # print('Marked to remove by Std: ', toRemoveICs6)
+
+    """ 4).7 Topographic distribution of standard deviations """
+    # toRemoveICs7 = Topographic_distribution(ICs_projections, info, marked_ICs)
+    # print('Marked to remove by topographic distribution: ', toRemoveICs7)
+
+    """ 4).8 Remove ICs with large amplitudes """
+    # toRemoveICs8a, toRemoveICs8b = Amplitude_thresholding(ICs_projections, THRESHOLD_AMPLITUDE, PEAK_DIFFERENCE,
+    #                                                       marked_ICs)
+    # print('Marked to remove by Large amplitudes A: ', toRemoveICs8a)
+    # print('Marked to remove by Large amplitudes B: ', toRemoveICs8b)
+
+    for i in range(np.size(marked_ICs)):
+        if (marked_ICs[i] > REMOVING_THRESHOLD):
+            S[:, i] = 0
+
+    clean_A2 = (ica.inverse_transform(S)).T
+
+    """ 5) Spike Zone Thresholding """
+    newD1 = Thresholding(D1, DC_checkVal, DC_adjustVal)
+    newA2 = Thresholding(clean_A2, AC_checkVal, AC_adjustVal)
+    newD2 = Thresholding(D2, DC_checkVal, DC_adjustVal)
+
+    print("Number of channel markings: ", marked_ICs)
+
+    pre_clea_data = []
+    for i in range(np.size(D2, 0)):
+        coeff = [newA2[i], newD2[i], newD1[i]]
+        actual_reconstuction = pywt.waverec(coeff, waveletname)
+        pre_clea_data.append(actual_reconstuction)
+
+    clean_data = np.array(pre_clea_data)
+    return clean_data
+
+class ArtefactFilter:
+
+    def offline_filter(self, epochs):
+        """Offline Faster algorithm
+
+        Filters the input epochs, and saves the parameters (such as ICA weights),
+        for the possibility of online filtering
+
+        Parameters
+        ----------
+        epochs : mne.Epochs
+            The epochs to analyze
+
+        Returns
+        -------
+        mne.Epochs
+            The filtered epoch
+        """
+        for i in range(len(epochs)):
+            epochs._data[i, ...] = FORCe(epochs.info, epochs[i].get_data()[0])[:,:-1]
+        return epochs
+
+
+if __name__ == '__main__':
+
+    file_name = 'F:/ÖNLAB/Databases/physionet.org/physiobank/database/eegmmidb/S001/S001R03.edf'
+    raw = read_raw_edf(file_name)
+    raw_croped = raw.crop(tmax=60).load_data()
+    raw_croped.resample(500)
+    data = raw_croped.get_data()
+    sampling_freq = raw.info['sfreq']
+    chanel_number = np.size(raw.info['ch_names'])
+
+    """ The following parameters must always be set manually! """
+    windowLengthCoeff = 1
+    windowLength = int(windowLengthCoeff * sampling_freq)
+    # Whole data length:
+    N = windowLength * (int(np.size(data, 1) / windowLength))
+    # Just for a partition of data:
+    # N = windowLength * 10
+    rest = np.size(data, 1) - N
+    preEEG_clean = []
+
+    for windowPosition in range(0, N, windowLength):
+        window = np.arange(windowPosition, (windowPosition + windowLength), 1, dtype=int)
+        preEEG_clean.append(FORCe(raw_croped.info, data[:, window]))
+        print('--- ' + str(window[0]) + '-' + str(window[-1]) + ' ---')
+
+    EEG_clean = np.concatenate(preEEG_clean, axis=1)
+
+    """ -- Plot all of chanels -- """
+    # for i in range(np.size(EEG_clean, 0)):
+    #     plt.subplot(np.size(EEG_clean, 0), 1, i+1)
+    #     plt.plot(EEG_clean[i, :])
+    # plt.show()
+
+    """ -- Plot chanels grouped 8 chanels -- """
+    # for i in range(int(np.size(EEG_clean, 0)/8)):
+    #     for j in range(8):
+    #         plt.subplot(np.size(EEG_clean, 0)/8, 1, j+1)
+    #         plt.plot(np.multiply(data[(i*8)+j, :], 1e6), 'r')
+    #         plt.plot(EEG_clean[(i * 8)+j, :])
+    #     plt.show()
+
+    """ -- Plot chanels grouped 16 chanels -- """
+    for i in range(int(np.size(data, 0) / 16)):
+        for j in range(16):
+            plt.subplot(np.size(EEG_clean, 0) / 4, 1, j + 1)
+            plt.plot(np.multiply(data[(i * 16) + j, :], 1e6), 'r')
+            plt.plot(EEG_clean[(i * 16) + j, :])
+            plt.ylabel(raw.info['ch_names'][(i * 16)+j])
+        plt.show()
\ No newline at end of file