Skip to content
Snippets Groups Projects
Commit b73bc61c authored by Harascsák Ádám's avatar Harascsák Ádám
Browse files

Thesis work.

parents
No related branches found
No related tags found
No related merge requests found
import numpy as np
import math
def calc_MI(X,Y,bins):
c_XY = np.histogram2d(X,Y,bins)[0]
c_X = np.histogram(X,bins)[0]
c_Y = np.histogram(Y,bins)[0]
H_X = shan_entropy(c_X)
H_Y = shan_entropy(c_Y)
H_XY = shan_entropy(c_XY)
MI = H_X + H_Y - H_XY
return MI
def shan_entropy(c):
c_normalized = c / float(np.sum(c))
c_normalized = c_normalized[np.nonzero(c_normalized)]
H = -sum(c_normalized* np.log2(c_normalized))
return H
""" Step 4).1: Auto-Mutal Informatin """
def Auto_Mutual_Information(ICs_projections, lag_offset, max_value, min_value):
marked_ICs = np.zeros(64)
pre_AMIs = []
for i in range(np.size(ICs_projections, 2)):
actual_projection = np.transpose(ICs_projections[:, :, i])
lag_offsets = np.array([lag_offset])
cAMIs = []
for lagNo in range(np.size(lag_offsets)):
lags = np.arange(0, math.floor(np.size(ICs_projections, 1) / 2), lag_offsets[lagNo])
end_p = np.size(ICs_projections, 1)
for chNo in range(np.size(actual_projection, 0)):
actual_AMIs = []
for k in range(np.size(lags)):
actual_AMIs.append(
calc_MI(actual_projection[chNo, 0:end_p - lags[k]], actual_projection[chNo, lags[k]:end_p], 5))
cAMIs.append(np.mean(actual_AMIs))
pre_AMIs.append(np.max(cAMIs))
AMIs = np.array(pre_AMIs)
thresholdMax = max_value
thresholddMin = min_value
for i in range(np.size(AMIs)):
if (AMIs[i] > thresholdMax or AMIs[i] < thresholddMin):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import numpy as np
from scipy.signal import find_peaks
""" Step 4).2: Spiking activity """
def Spiking_activity(S, coefficientThreshold):
pre_spike_threshold = []
marked_ICs = np.zeros(64)
IC = S.T
for i in range(np.size(IC, 0)):
peaks_actual, _ = find_peaks(np.abs(IC[i, :]))
actual_peak_value = np.abs(IC[i, peaks_actual])
mean_actual = np.mean(actual_peak_value)
std_actual = np.std(actual_peak_value)
threshold_actual = mean_actual + (3 * std_actual)
pre_spike_threshold.append(threshold_actual)
if np.max(actual_peak_value) > threshold_actual:
marked_ICs[i] = marked_ICs[i] + 1
""" -- Spike zone coefficients -- """
actual_signal = np.abs(IC[i])
pre_actual_spike_position = []
pre_actual_Co = []
for j in range(1, np.size(actual_signal) - 1):
if (actual_signal[j] > actual_signal[j - 1] and actual_signal[j] > actual_signal[j + 1]):
pre_actual_spike_position.append(j)
# Pythonban [x:y] = [x,y) => mivel az utolsó index nincs benne, j-1:j+2!
actual_mean = np.mean(actual_signal[j - 1:j + 2])
actual_std = np.std(actual_signal[j - 1:j + 2])
pre_actual_Co.append(actual_mean / actual_std)
actual_Co = np.array(pre_actual_Co)
actual_spike_position = np.array(pre_actual_spike_position)
threshold = 0.1 * (np.mean(actual_Co) + np.std(actual_Co))
if (np.size(actual_Co) > 0):
high_Co_counter = 0
for k in range(np.size(actual_spike_position)):
if (actual_Co[k] > threshold):
high_Co_counter += 1
no_spikes = np.size(high_Co_counter) / np.size(actual_spike_position)
else:
no_spikes = 0
if (no_spikes > coefficientThreshold):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import numpy as np
from scipy.stats import kurtosis
""" 4).3 Kurtosis """
def Kurtosis(ICs_projections):
pre_IC_kurtosis = []
marked_ICs = np.zeros(64)
for i in range(np.size(ICs_projections, 2)):
actual_projection = np.transpose(ICs_projections[:, :, i])
pre_kurtosis = []
for k in range(np.size(actual_projection, 0)):
actual_kurtosis = kurtosis(actual_projection[k, :])
pre_kurtosis.append(actual_kurtosis)
pre_IC_kurtosis.append(np.mean(np.array(pre_kurtosis)))
IC_kurtosis = np.array(pre_IC_kurtosis)
mu = np.mean(IC_kurtosis)
sigma = np.std(IC_kurtosis)
for i in range(np.size(IC_kurtosis)):
k = IC_kurtosis[i]
if k > (mu + (0.5 * sigma)):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import numpy as np
import math
from scipy.fft import fft, ifft
def indices(a, func):
return [i for (i, val) in enumerate(a) if func(val)]
def nextpow2(x):
""" Function for finding the next power of 2 """
return 1 if x == 0 else math.ceil(math.log2(x))
def powerspectrum(data, Fs):
T = 1 / Fs
L = np.size(data, 0)
NFFT = 2 ** nextpow2(L)
Y = fft(data, NFFT) / L
f = Fs / 2 * np.linspace(0, 1, int(NFFT / 2))
power = 2 * np.abs(Y[0:int(NFFT / 2)])
return f, power
""" 4).4-5 Check PSDs + PSD of gamma frequency """
def PSD(ICs_projections, FS, distance, thGamma):
gammaPSDT = []
diff = []
marked_ICs = np.zeros(64)
for i in range(np.size(ICs_projections, 2)):
if (marked_ICs[i] <= 3):
actual_projection = np.transpose(ICs_projections[:, :, i])
pre_diff = []
pre_gamma = []
for k in range(np.size(actual_projection, 0)):
f, power = powerspectrum(actual_projection[k, :], FS)
actual_idealDistro1 = f[1:]
actual_idealDistro2 = np.divide(1, f[1:])
normedActual_idealDistro2 = np.divide(actual_idealDistro2, np.max(actual_idealDistro2))
normedPsCheck = np.divide(power[1:], np.max(power[1:]))
actual_diff = normedPsCheck - normedActual_idealDistro2
actual_specDistT = np.mean(math.sqrt(np.dot(actual_diff, actual_diff.T)))
pre_diff.append(actual_specDistT)
actual_gammaPSDT = np.mean(power[f > 30])
pre_gamma.append(actual_gammaPSDT)
diff.append(np.mean(np.array(pre_diff)))
gammaPSDT.append(np.mean(np.array(pre_gamma)))
else:
diff.append(-1)
gammaPSDT.append(-1)
gammaIC = np.array(gammaPSDT)
diffIC = np.array(diff)
for i in range(np.size(ICs_projections, 2)):
if (diffIC[i] > distance):
marked_ICs[i] = marked_ICs[i] + 1
if (gammaIC[i] > thGamma):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import numpy as np
""" 4).6 Check stds of projections of ICs. """
def Projection_STD(ICs_projections):
pre_thetaProjection = []
marked_ICs = np.zeros(64)
for i in range(np.size(ICs_projections, 2)):
actual_projection = np.transpose(ICs_projections[:,:, i])
pre_stds = []
for k in range(np.size(actual_projection, 0)):
actual_stds = np.std(actual_projection[k])
pre_stds.append(actual_stds)
stds = np.array(pre_stds)
pre_thetaProjection.append(np.mean(stds))
thetaProjection = np.array(pre_thetaProjection)
mean_theta = np.mean(thetaProjection)
sigma_theta = np.std(thetaProjection)
for i in range(np.size(ICs_projections, 2)):
if (thetaProjection[i] > (mean_theta + (2 * sigma_theta))):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import numpy as np
""" 4).7 Topographic distribution of standard deviations """
def Topographic_distribution(ICs_projections, info):
pre_frontal_chanels = []
pre_other_chanels = []
marked_ICs = np.zeros(64)
for i in range(np.size(info['ch_names'])):
if ((info['ch_names'][i].find('F') != -1) or ((info['ch_names'][i].find('f') != -1))):
pre_frontal_chanels.append(i)
else:
pre_other_chanels.append(i)
frontal_chanels = np.array(pre_frontal_chanels)
other_chanels = np.array(pre_other_chanels)
pre_R = []
for i in range(np.size(ICs_projections, 2)):
actual_projection = np.transpose(ICs_projections[:, :, i])
actual_std_front = np.std(actual_projection[frontal_chanels, :])
actual_std_other = np.std(actual_projection[other_chanels, :])
actual_R = actual_std_front / actual_std_other
pre_R.append(actual_R)
R = np.array(pre_R)
mean_R = np.mean(R)
std_R = np.std(R)
for i in range(np.size(R, 0)):
if (R[i] > (mean_R + std_R)):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import numpy as np
def Amplitude_thresholding(ICs_projections, thAmplitude, peakDifference):
marked_ICs = np.zeros(64)
for i in range(np.size(ICs_projections, 2)):
if (marked_ICs[i] <= 3):
actual_projection = np.transpose(ICs_projections[:, :, i])
actual_mean = np.mean(actual_projection, 0)
max_value = np.max(actual_mean)
min_value = np.min(actual_mean)
diff = max_value - min_value
if (max_value > thAmplitude or min_value < -(thAmplitude)):
marked_ICs[i] = marked_ICs[i] + 1
if (diff > peakDifference):
marked_ICs[i] = marked_ICs[i] + 1
return marked_ICs
\ No newline at end of file
import pywt
import concurrent.futures
import matplotlib.pyplot as plt
from mne.io import read_raw_edf
from preprocess.artefact_1_AMI import *
from preprocess.artefact_2_Spiking import *
from preprocess.artefact_3_Kurtosis import *
from preprocess.artefact_4_5_PSD import *
from preprocess.artefact_6_Projection_STD import *
from preprocess.artefact_7_Topographic_distribution import *
from preprocess.artefact_8_Amplitude_thresholding import *
from preprocess.artefact_Spike_zone_thresholding import *
from preprocess.artefact_PSD import *
from sklearn.decomposition import FastICA
""" This implementation is based on FORCe: Fully Online and Automated Artifact
Removal for Brain-Computer Interfacing by Ian Daly, Reinhold Scherer, Martin Billinger, and Gernot Müller-Putz."""
""" Constants: """
# 4.1 AMI
LAG_OFFSET = 60
THRESHOLD_MAX = 2.0
THRESHOLD_MIN = 1.0
# 4.2 Spiking
THRESHOLD_COEFF = 0.25
# 4.4-5 PSD + Gamma
DISTANCE = 3.5
THRESHOLD_GAMMA = 1.7
# 4.8 Large amplitude removing
THRESHOLD_AMPLITUDE = 100
PEAK_DIFFERENCE = 60
# 5 Spike-zone thresholding
DC_checkVal = 0.2
DC_adjustVal = 0.07
AC_checkVal = 0.7
AC_adjustVal = 0.8
REMOVING_THRESHOLD = 3
def FORCe(info, original_data):
nCh = len(info['chs'])
data = np.multiply(original_data, 1e6)
""" Step 1) - 4) """
waveletname = 'sym4'
coeffitiantsLevel1 = []
coeffitiantsLevel2 = []
cA2 = []
cD1 = []
cD2 = []
marked_ICs = np.zeros(nCh)
"""
# ''cAx'' is he array of approximation coefficient (low pass filters) and
# ''cDx'' is the list of detail coefficients (high pass filters).
# 2 level decomposition!
"""
for i in range(np.size(data, 0)):
actual_coeff = pywt.wavedec(data[i][:], waveletname, level=1)
cA1_actual, cD1_actual = actual_coeff
coeffitiantsLevel1.append(actual_coeff)
cD1.append(cD1_actual)
D1 = np.array(cD1)
for i in range(np.size(data, 0)):
actual_coeff = pywt.wavedec(data[i][:], waveletname, level=2)
cA2_actual, cD2_actual, cD1_actual = actual_coeff
coeffitiantsLevel2.append(actual_coeff)
cA2.append(cA2_actual)
cD2.append(cD2_actual)
A2 = np.array(cA2)
D2 = np.array(cD2)
"""
# S: 2D array containing estimated source signals
# A: 2D array containing mixing matrix, i.e. A.dot(S) = X
"""
ica = FastICA(n_components=len(info['chs']), algorithm='parallel')
S = ica.fit(A2.T).transform(A2.T)
A = ica.mixing_
assert np.allclose(A2.T, np.dot(S, A.T) + ica.mean_)
assert A.shape[0] == A.shape[1]
ICs_projections = np.zeros((np.size(S, 0), np.size(S, 1), np.size(S, 1)))
for i in range(np.size(S, 1)):
actual_S = np.copy(S)
for j in range(np.size(S, 1)):
if (j != i):
actual_S[:, j] = 0
actual_projection = np.dot(actual_S, A.T)
ICs_projections[:, :, i] = actual_projection
with concurrent.futures.ThreadPoolExecutor() as executor:
toRemoveICs1 = executor.submit(Auto_Mutual_Information, ICs_projections, LAG_OFFSET, THRESHOLD_MAX, THRESHOLD_MIN)
toRemoveICs2 = executor.submit(Spiking_activity, S, THRESHOLD_COEFF)
toRemoveICs3 = executor.submit(Kurtosis, ICs_projections)
toRemoveICs4 = executor.submit(PSD, ICs_projections, info['sfreq'], DISTANCE, THRESHOLD_GAMMA)
toRemoveICs5 = executor.submit(Projection_STD, ICs_projections)
toRemoveICs6 = executor.submit(Topographic_distribution, ICs_projections, info)
toRemoveICs7 = executor.submit(Amplitude_thresholding, ICs_projections, THRESHOLD_AMPLITUDE, PEAK_DIFFERENCE)
for i in range(np.size(marked_ICs)):
marked_ICs[i] = toRemoveICs1.result()[i] + toRemoveICs2.result()[i] + toRemoveICs3.result()[i] + toRemoveICs4.result()[i] + toRemoveICs5.result()[i] + toRemoveICs6.result()[i] + toRemoveICs7.result()[i]
# Without paralellisation:
""" Step 4).1: Auto-Mutal Informatin """
# toRemoveICs1 = Auto_Mutual_Information(ICs_projections, LAG_OFFSET, THRESHOLD_MAX, THRESHOLD_MIN, marked_ICs)
# print('Marked to remove by AMI: ', toRemoveICs1)
""" Step 4).2: Spiking activity """
# toRemoveICs2a, toRemoveICs2b = Spiking_activity(S, THRESHOLD_COEFF, marked_ICs)
# print('Marked to remove by Spiking-activity: ', toRemoveICs2a)
# print('Marked to remove by Spike zone coefficients: ', toRemoveICs2b)
""" 4).3 Kurtosis """
# toRemoveICs3 = Kurtosis(ICs_projections, marked_ICs)
# print('Marked to remove by Kurtosis : ', toRemoveICs3)
""" 4).4-5 Check PSDs + PSD of gamma frequency """
# toRemoveICs4, toRemoveICs5 = PSD(ICs_projections, info['sfreq'], DISTANCE, THRESHOLD_GAMMA, marked_ICs)
# print('Marked to remove by PSD & 1/F distribution: ', toRemoveICs4)
# print('Marked to remove by Gamma frequency: ', toRemoveICs5)
""" 4).6 Check stds of projections of ICs. """
# toRemoveICs6 = Projection_STD(ICs_projections, marked_ICs)
# print('Marked to remove by Std: ', toRemoveICs6)
""" 4).7 Topographic distribution of standard deviations """
# toRemoveICs7 = Topographic_distribution(ICs_projections, info, marked_ICs)
# print('Marked to remove by topographic distribution: ', toRemoveICs7)
""" 4).8 Remove ICs with large amplitudes """
# toRemoveICs8a, toRemoveICs8b = Amplitude_thresholding(ICs_projections, THRESHOLD_AMPLITUDE, PEAK_DIFFERENCE,
# marked_ICs)
# print('Marked to remove by Large amplitudes A: ', toRemoveICs8a)
# print('Marked to remove by Large amplitudes B: ', toRemoveICs8b)
for i in range(np.size(marked_ICs)):
if (marked_ICs[i] > REMOVING_THRESHOLD):
S[:, i] = 0
clean_A2 = (ica.inverse_transform(S)).T
""" 5) Spike Zone Thresholding """
newD1 = Thresholding(D1, DC_checkVal, DC_adjustVal)
newA2 = Thresholding(clean_A2, AC_checkVal, AC_adjustVal)
newD2 = Thresholding(D2, DC_checkVal, DC_adjustVal)
print("Number of channel markings: ", marked_ICs)
pre_clea_data = []
for i in range(np.size(D2, 0)):
coeff = [newA2[i], newD2[i], newD1[i]]
actual_reconstuction = pywt.waverec(coeff, waveletname)
pre_clea_data.append(actual_reconstuction)
clean_data = np.array(pre_clea_data)
return clean_data
class ArtefactFilter:
def offline_filter(self, epochs):
"""Offline Faster algorithm
Filters the input epochs, and saves the parameters (such as ICA weights),
for the possibility of online filtering
Parameters
----------
epochs : mne.Epochs
The epochs to analyze
Returns
-------
mne.Epochs
The filtered epoch
"""
for i in range(len(epochs)):
epochs._data[i, ...] = FORCe(epochs.info, epochs[i].get_data()[0])[:,:-1]
return epochs
if __name__ == '__main__':
file_name = 'F:/ÖNLAB/Databases/physionet.org/physiobank/database/eegmmidb/S001/S001R03.edf'
raw = read_raw_edf(file_name)
raw_croped = raw.crop(tmax=60).load_data()
raw_croped.resample(500)
data = raw_croped.get_data()
sampling_freq = raw.info['sfreq']
chanel_number = np.size(raw.info['ch_names'])
""" The following parameters must always be set manually! """
windowLengthCoeff = 1
windowLength = int(windowLengthCoeff * sampling_freq)
# Whole data length:
N = windowLength * (int(np.size(data, 1) / windowLength))
# Just for a partition of data:
# N = windowLength * 10
rest = np.size(data, 1) - N
preEEG_clean = []
for windowPosition in range(0, N, windowLength):
window = np.arange(windowPosition, (windowPosition + windowLength), 1, dtype=int)
preEEG_clean.append(FORCe(raw_croped.info, data[:, window]))
print('--- ' + str(window[0]) + '-' + str(window[-1]) + ' ---')
EEG_clean = np.concatenate(preEEG_clean, axis=1)
""" -- Plot all of chanels -- """
# for i in range(np.size(EEG_clean, 0)):
# plt.subplot(np.size(EEG_clean, 0), 1, i+1)
# plt.plot(EEG_clean[i, :])
# plt.show()
""" -- Plot chanels grouped 8 chanels -- """
# for i in range(int(np.size(EEG_clean, 0)/8)):
# for j in range(8):
# plt.subplot(np.size(EEG_clean, 0)/8, 1, j+1)
# plt.plot(np.multiply(data[(i*8)+j, :], 1e6), 'r')
# plt.plot(EEG_clean[(i * 8)+j, :])
# plt.show()
""" -- Plot chanels grouped 16 chanels -- """
for i in range(int(np.size(data, 0) / 16)):
for j in range(16):
plt.subplot(np.size(EEG_clean, 0) / 4, 1, j + 1)
plt.plot(np.multiply(data[(i * 16) + j, :], 1e6), 'r')
plt.plot(EEG_clean[(i * 16) + j, :])
plt.ylabel(raw.info['ch_names'][(i * 16)+j])
plt.show()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment