diff --git a/09_eeg_use_case.ipynb b/09_eeg_use_case.ipynb index eaf08b5dd20b6a1f5a9297efbd122a22c94f4f90..dac9a3a1c93db71a659e1af7d616002f5dc7be1f 100644 --- a/09_eeg_use_case.ipynb +++ b/09_eeg_use_case.ipynb @@ -2,32 +2,32 @@ "cells": [ { "cell_type": "code", - "execution_count": 22, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", - "import os\n", - "# import glob\n", - "import pandas as pd\n", + "# import os\n", + "# # import glob\n", + "# import pandas as pd\n", "# from scipy.signal import resample, butter, lfilter\n", "import matplotlib.pyplot as plt\n", - "\n", "from tqdm import tqdm\n", - "# from itertools import islice\n", - "\n", - "from sklearn.decomposition import PCA\n", - "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n", - "from sklearn.linear_model import LogisticRegression\n", - "from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier\n", - "from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, confusion_matrix\n", - "from sklearn.model_selection import train_test_split\n", + "# # from itertools import islice\n", + "\n", + "# from sklearn.decomposition import PCA\n", + "# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n", + "# from sklearn.linear_model import LogisticRegression\n", + "# from sklearn.ensemble import RandomForestClassifier, VotingClassifier, AdaBoostClassifier\n", + "# from sklearn.metrics import precision_recall_fscore_support, roc_auc_score, confusion_matrix\n", + "# from sklearn.model_selection import cross_val_score, train_test_split\n", "# from sklearn.preprocessing import StandardScaler\n", + "# from sklearn.pipeline import make_pipeline\n", "\n", - "# from multiprocessing import Pool\n", - "# from multiprocessing.pool import ThreadPool\n", + "# # from multiprocessing import Pool\n", + "# # from multiprocessing.pool import ThreadPool\n", "\n", - "# import time" + "# # import time" ] }, { @@ -97,7 +97,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Task 1: Load the trainig and test data sets and ... the order of the sessions." + "The data can be found in: '/data/eeg_use_case' and contains:\n", + "\n", + "- 8 series of recorded EEG data\n", + "\n", + "- 8 series of events of arm movements\n", + "\n", + "Load the EEG data and the events:\n", + "- combine all EEG series in one array (size: (total number of time series, number of channels))\n", + "- combine all events in one array (size: (total number of time series, number of different arm movement))\n", + "- pay attention to the order of the series" ] }, { @@ -106,31 +115,13 @@ "metadata": {}, "outputs": [], "source": [ - "def filter_data(data, events, subj = None):\n", - " # filter data for specific subjects\n", - " if subj:\n", - " data_filt = list(filter(lambda x: subj + '_' in x, data))\n", - " else:\n", - " data_filt = data\n", - "\n", - " events_filt = []\n", - " for d in data_filt:\n", - " subj, series, end = d.split('_')\n", - " ix = np.where([subj + '_' in a and series in a for a in events])[0][0]\n", - " events_filt.append(events[ix])\n", - "\n", - " return data_filt, events_filt\n", - "\n", "def load_data(file_names, path):\n", " # read the csv file and drop the id column\n", " dfs = []\n", " for f in file_names:\n", - " df = pd.read_csv(path + f)\n", - " df = df.drop('id', axis = 1)\n", + " df = pd.read_csv(path + f).drop('id', axis = 1)\n", " dfs.append(df)\n", - " #all_dfs = pd.concat(dfs)\n", - " all_dfs = dfs\n", - " return all_dfs" + " return dfs" ] }, { @@ -139,11 +130,17 @@ "metadata": {}, "outputs": [], "source": [ - "# define path and list all data and event files\n", - "path = '../ml-use-case-eeg/train/' \n", + "# define path and list of all data and event files\n", + "import os\n", + "import pandas as pd\n", + "\n", + "path = 'data/eeg_use_case/' \n", "\n", "all_data_files = list(filter(lambda x: '_data' in x, os.listdir(path)))\n", - "all_event_files = list(filter(lambda x: '_events' in x, os.listdir(path)))" + "all_event_files = list(filter(lambda x: '_events' in x, os.listdir(path)))\n", + "\n", + "all_data_sort = np.sort(all_data_files)\n", + "all_event_sort = np.sort(all_event_files)" ] }, { @@ -152,8 +149,9 @@ "metadata": {}, "outputs": [], "source": [ - "# sort data and event file names\n", - "data_filt, events_filt = filter_data(all_data_files, all_event_files, subj='subj1')" + "# load all data and event files\n", + "all_data = np.concatenate(load_data(all_data_sort, path))\n", + "all_events = np.concatenate(load_data(all_event_sort, path))" ] }, { @@ -162,9 +160,19 @@ "metadata": {}, "outputs": [], "source": [ - "# load all data and event files\n", - "all_data = np.concatenate(load_data(data_filt, path))\n", - "all_events = np.concatenate(load_data(events_filt, path))" + "# # sort data and event file names\n", + "# data_filt, events_filt = filter_data(all_data_files, all_event_files, subj='subj1')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# # load all data and event files\n", + "# all_data = np.concatenate(load_data(data_filt, path))\n", + "# all_events = np.concatenate(load_data(events_filt, path))" ] }, { @@ -174,6 +182,54 @@ "### Visualization" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visualize the EEG-data and events and pay attention to:\n", + "- the EEG traces\n", + "- the number of detected arm movements" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# visualization of data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAnMAAAGwCAYAAADCJOOJAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xm0ZWV55/HvU8wyiVYxhQXF6ECAgqogIJQIXWqLSXdMIiixrWC0lUjjMissF+LqAiNqgkS6E0TQpFAwGolLkEEIhnkQqgChpEAQq5C5kKmQoRie/mPvC5vT51ad4d57znvv97PWXpy993ve/d733OfyO/vsfSoyE0mSJJVp2qAHIEmSpN4Z5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCGeYkSZIKZpiTJEkqmGFOEyYiFkbE+R202zUizomIeyIiI2LBBAxPGnpd1NDHIuKqiHg8Ip6IiMsiYv+JGKM0zLqooT+LiEV1/fwuIm6JiI9MxBh7YZjTMHodsAw4Dvj1YIciFelA4PvAQcDbgDuBiyNi50EOSirIb4G/BfYBdgf+BfhWRLx3oKMaTWa6jPECXA58Hfgq8BiwAjgaWA/4J+AJ4F7gwy3P2w24FHi2ft5CYNN637uAVcAbW55zInBrY30/4ArgGeD+ehybjMHYfg/4HvB4vVwA7NzYvwBYAhwG/ApYCfwImN7Yny3LgR3M5RJgwaBfU5eJXayhsauh+rkBPAQcNejX1mViFmtobGuofv5NwJcG/dq2WzwzN34Op/pFehvwZeBrVL9UvwTmAGcC34yIrQAiYkPgYuBpYG/gj6kK4p/r/n4KPAr82cgBIiKADwFn1eu7AZcA5wF7AO8HZjX66HVsrwMuA54D3gHsCzwIXFrvGzETOLQe+7uAPYEv1vtOAv6N6o/EVvVybQfzqKnLGhq7GloXWJ/qf4CaOqyhMaihqBwMvAm4ck3tB2LQaXIyLlTvOq5rrAfVO4/zGtvWoXqH86f1+seAJ4GNG20OpHrnsFO9fjJwVWP//sBLwDb1+reBb7WMZVbdx+Z9jO0I4C4gGm3WojoN/YF6fQFVkW3aaPM54O7G+kLg/C7n0jNzU3Cxhl5p03cN1c/7e+A+GmdHXCb3Yg290qbnGgI2pQq2L9T9fnTQr+toi2fmxs+tIw+y+q14BLitse0FqnfJm9eb3kJ1mnplo49rgZeBt9brZwFvj4jt6vXDgSsy8756fTbw5xHx9MgCXFPv27GPsc0GtgdWNvp9Etispd/lmflkY/2BRh9St6yhMaihiDga+J/A+zPzqX76UnGsof5qaCVVEP0DqlB4cn2GbuisPegBTGIvtKznKNs6CdQJkJk3RcQdwIci4iSqU93HNNpNA74J/EObPu7vY2zTgFuorkNo9dga+vUNg3plDfVZQxHxaeALwH/NzBt67UfFsob6qKHMfBm4u169JSLeAhxL9XHzUDHMDY+lwBERsXHjXdF+VL+ESxvtzqJ6J7QE2BA4p7HvJmDXzLybsXUT8EHg0cx8oo9+VlGdFpfGgzXUEBGfAY4HDsnMq/s4pqYOa2j1plHdpDF0PGsyPM6muvPn2xGxW0TMBb4B/LClKM6mOt39BeDHLR+bfAXYOyJOi4g9I2KniHhfRHxjDMb2MHBuRLwjIraPiLkR8dUuv+pgGfD7EfGmiJgeEeu0axQR60bErIiYRXXR9pb1+k59/hya3KyhWkT8DdVF5R8FfhkRW9bLpn3+HJrcrKFaRHwuIv5LROwQEW+JiL8GPkx9o8ewMcwNicx8Bng3sAlwA3AucB3VRZ/NdsuBq6nuEjqrZd+twFyqu3muAH4OfImqAPod21zgHuAHwB1UdxptRnd3x51B9e5uEdXFrm8fpd3WwM31siPV9T43U526l9qyhl7jr6guIP8+1R1/I8spPQxfU4Q19BobUX19yi+orvn7E+B/ZOZpvYx/vEV9x4YkSZIK5Jk5SZKkghnmJEmSCmaYkyRJKphhTpIkqWBT5nvmpk+fnjNnzhz0MFSIxYsXP5qZMwY9jmFjHalT1lB71pA61U0NTZkwN3PmTBYtWjToYagQEbF80GMYRtaROmUNtWcNqVPd1JAfs0qSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUsCnz1SSrM/OzFwx6CB1Z9uVDBj0EqS1rSOpPKTUE1tEw8sycJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBigxzEbF9RFwWEbdHxG0RseGgxySVxBqS+mcdaVisPegB9GghcFxmXhURbwCeH/B4pNIsxBqS+rUQ60hDoLgwFxG7Ai9k5lUAmfnYgIckFcUakvpnHWmYdPQxa0TMjYjzIuL+iMiImN/LwTrtJyKOjIhfR8RzEbE4Ig5o7N4ZeDoifhwRN0XEsb2MRZpI1pDUP+tIaq/Ta+Y2ApYARwPP9nG8NfYTEYcCpwAnAnsC1wIXRcS2dZO1gQOAI4F9gXkRMa+PMUkTwRqS+mcdSW10FOYy88LMPDYzzwFe7vVgHfbzGWBhZp6RmUsz8yjgQeCT9f77gUWZ+ZvMfB64EJjV65ikiWANSf2zjqT2hupu1ohYF5gNXNKy6xJgv/rxjcDmEbFZREwD5gJLR+nv4xGxKCIWrVixYryGLQ2Nsa6huk/rSFOK/y9SacY8zEXEpyLihh6fPh1YC3i4ZfvDwJYAmfkicCxwJXArcFdmnt+us8w8PTPnZOacGTNm9DgkaWINUw3V7a0jFWeY6sga0ngbj7tZpwO7jEO/r8jMi4CLxvMY0gBZQ1L/rCNNGWN+Zi4zF2Tm63t8+qPAS8AWLdu3AB7qa2BSIawhqX/WkaaSobpmLjNXAYuB1juC5lHdSSRpNawhqX/WkUrT0cesEbERsFO9Og3YNiJmAY9l5r2dHqzDfk4GvlNf63AN8Alga+C0To8jDRtrSOqfdSS11+mZuTnAzfWyAXB8/fiELo+3xn4y8/vAp4HjgFuA/YH3ZubyLo8lDRNrSOqfdSS10dGZucy8HIh+D9ZpP5l5KnBqv8eThoU1JPXPOpLaG6pr5iRJktQdw5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFW3vQA+hFRCwDngJeBh7PzHcOdkRSeawjqT/WkIZFkWGutl9mPj3oQUiFs46k/lhDGjg/ZpUkSSpYR2EuIhZERLYsD3V7sIiYGxHnRcT9dR/zR2l3ZET8OiKei4jFEXFAS5MEroiIGyPi8G7HIQ2CdST1xxqS2uvmzNydwFaNZbcejrcRsAQ4Gni2XYOIOBQ4BTgR2BO4FrgoIrZtNNs/M2cDfwQcGxG79zAWaRCsI6k/1pDUopsw92JmPtRYVnR7sMy8MDOPzcxzqC4YbeczwMLMPCMzl2bmUcCDwCcb/dxf//dB4EJgr3YdRcTHI2JRRCxasaLr4UrjwTqS+mMNSS26CXM7RMQD9Snn70XEDmM9mIhYF5gNXNKy6xJgv7rNhhGxcf14I+Ag4Bft+svM0zNzTmbOmTFjxlgPV+qFdST1xxqSWnQa5n4GzAfeA3wM2BK4NiLe2NowIj4VETf0OJ7pwFrAwy3bH66PCbAFcHVE/By4Hvh2Zt7Y4/GkiWQdSf2xhqQ2Ovpqksy8qLkeEdcD9wAfAU5uaT4d2GVMRtd+LPcAe4xX/9J4sY6k/lhDUns9fTVJ/Z06vwB2brNvQWa+vsfxPAq8RPWOp2kLoOs7lqRhZh1J/bGGpEpPYS4i1gfeTHUx6JjJzFXAYmBey655VHcSSZOGdST1xxqSKh19zBoRJwE/Bu4FNgc+D2wInNnNweqLRHeqV6cB20bELOCxzLy33n4y8J36WodrgE8AWwOndXMsadhYR1J/rCGpvU7/Oa9tgH+lugZhBdXFnvtk5vIujzcHuKyxfny9nEl1USuZ+f36YtbjqL5DaAnw3h6OJQ0b60jqjzUktdHpDRCHjcXBMvNyIDpodypw6lgcUxoW1pHUH2tIas9/m1WSJKlghjlJkqSCGeYkSZIKZpiTJEkqmGFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCGeYkSZIKZpiTJEkqmGFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCGeYkSZIKZpiTJEkqmGFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCGeYkSZIKtvagB6DJZ+ZnLxj0EDq27MuHDHoIUlul1JE1pGE1lWrIM3OSJEkFM8xJkiQVzDAnSZJUsMjMQY9hQkTECmD5BB5yOvDoBB5vspvo+dwuM2dM4PGKMMF1ZA2NvYmcU2uoDWuoeENZQ1MmzE20iFiUmXMGPY7JwvmcenzNx55zOrX4eo+9YZ1TP2aVJEkqmGFOkiSpYIa58XP6oAcwyTifU4+v+dhzTqcWX++xN5Rz6jVzkiRJBfPMnCRJUsEMc5IkSQUzzEmSJBXMMFe4iFgQEUsGPY5hFhHzI+LpQY9Dw8ka6ox1pNWxjtZsPGvIMCdJklSyzJzUC3A5cCpwItU/wfEIcBIwrdFmM+BM4HHgWeBSYNc19Lsu8BXgPuAZ4Ebg3fW+acBvgKNanrMLkMBe9fqmVLc5PwKsBK4A5jTazweeBg4GlgC/Ay4Dtm/sz5Zl/qDnvOVnDuAY4Ff13N4G/Hm971rgqy3tN6nbvX9N81zvP7D+uQ8Gfla3WdSY45H9zWXBoOelpMUaGvxiHZW/WEcDn/9JXUMDn+AJeAEvB54ETqh/gT8AvAh8sNHmXOAOYC6wG3BeXQAbrKbfs4Hr6+fsAHwKWAXsUe//O+D6luccD9ze+MW6GrgA2BvYCfgC8BSwVd1mPvBCXdB7A7sDNwMX1/s3oPpjcAewZb2MOuYBzf8XgTuB9wDbAx+q/xAcAhwJ3M9r/5j9BdUfsvU6nOeRArkBeCfwZuBiYGk9x+sCR9fHHJmjjQY9LyUt1tDgF+uo/MU6Gvj8T+oaGvgET1ABXdey7T+Ab9aPd65fgLmN/ZvWRfeXo/S5I/AysG3L9h8Bp9aPd6/73bGx/y7g2PrxQVTvdDZo6eMW4Jj68fy6jzc19h8OPM+r3xG4AFgy6HkeZZ42pHpnc0DL9q8BFwJvrIvh4Ma+S4HTu5jnkQJqvkN6e71tm8Y8Pj3o+Sh1sYYGPv/W0SRYrKOBzv2kr6G1mRpubVl/ANi8fvwWqhfpupGdmflkRNwGvHWU/vaiStq3R0Rz+3rAf9Z93Fr3cThwQkS8jeoX4uy67WzgdcCKlj7Wr9uNeD4z72wZ+7pUp+MfG2V8w+KtVD/PTyIiG9vXAZZl5m8j4idUc/TTiNia6h3NCXW7Nc5zQ/M1fqD+7+ZUp8TVP2tocKyjycM6GoxJX0NTJcy90LKedHbzR46yfVq97w/a9P1s4/FZwEepfiEOB67OzOWNPh4GDmjT/1ONxy+OMqYSbl4ZGeMfAve27BuZt7OAMyLiSOAwqo8Urmo8v5N5pmV/SXNUCmtocKyjycM6GoxJX0NTJcytzlKqid4XuBIgIjahul7hX0Z5zs1UKX3LzLxsNX1/F/hSROwDHAp8vrHvJmAL4OXMvKeP8a8C1urj+ePpdqrT8NtlZuu7lxHnAWcA76P6I/PdrM9H0/k8r8kwz9FkYA2NL+toarCOxs+kr6EpH+Yy866IOBf4RkR8HHiC6kLJp6gKoN1zfhkRZwMLI+KvqYrhDVSfmd+TmT+s290XEVcAp1Fd+/CDRjeXAtcA50bEMbx64eh7gEsz8yo6swzYLiL2onrHsTIzn+/05x9PmbkyIk4CTorq3PSVwEbAPlR/OE7PzOci4t+B44A9gA83nt/RPHdgGbB+RMyjKspnMvOZMfkhZQ2NM+toarCOxs+UqKHxuuBwWBaqi07/sWXbQuD8xnovt4OvQ3XB5z1UafshqmQ/u6XdEVSnWn/Ypo+NgVOoPktfRXVa93vUF6rS5mJJXr3Icnq9vh5wTj32ZDhvBz+KV98ZraC66Hdeo81B9dhv6naeW+ej3jaz3ta8tf7rVF8HkPiVCt2+htbQ4F8D66jwxToa+PxP6hoauQtFkiRJBSrhwkVJkiSNwjAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCZMRCyMiPO7fM4HIyK7fZ40GXVaQxExv66b1mX9iRinNKy6+f9QRGwSEf8nIh6IiOcj4u6I+MB4j7EXaw96ANJoImIH4O+BqwY9FqlAzwA7Njdk5nMDGotUlIhYB/gP4DHgA8B9wDbA84Mc12g8MzcOIuLyiPh6RHw1Ih6LiBURcXRErBcR/xQRT0TEvRHx4Zbn7RYRl0bEs/XzFkbEpvW+d0XEqoh4Y8tzToyIWxvr+0XEFRHxTETcX49jkzEY2+9FxPci4vF6uSAidm7sXxARSyLisIj4VUSsjIgfRcT0kf3AR4BDGmcJDlzNHK4D/CvwOeCerl8EFc0a6r+GgMzMh5pL1y+EimUN9V1DfwHMAP5bZl6dmcvq/97Yw8sx7gxz4+dwYCXwNuDLwNeAHwG/BOYAZwLfjIitACJiQ+Bi4Glgb+CPgf2Af677+ynwKPBnIweIiAA+BJxVr+8GXAKcB+wBvB+Y1eij17G9DrgMeA54B7Av8CBwab1vxEzg0Hrs7wL2BL5Y7zsJ+DfgUmCrerl2NfP3RWBZZp65mjaa3Kyh/mpog4hYHhH3RcT5EbHnatpqcrKGeq+h/w5cA/zfiHgoIm6vw+I6o7QfrMx0GeMFuBy4rrEewArgvMa2dYBVwJ/W6x8DngQ2brQ5EEhgp3r9ZOCqxv79gZeAber1bwPfahnLrLqPzfsY2xHAXUA02qwF/Bb4QL2+gKrINm20+Rxwd2N9IXB+B/P3LmAZ8PpunucyeRZr6JU2vdbQvlRnIGYBBwDnUH3suvOgX1uXiVmsoVfa9FpDd9R9/TMwG/gT4CHgpEG/tu0Wr5kbP6+ccs7MjIhHgNsa216IiMeBzetNbwFuzcyVjT6uBV4G3grcTfXO59MRsV1mLqd6Z3NFZt5Xt58N7BQRhzb6iPq/OwKP9Di22cD2wMrqTdgrXsdrr8lZnplPNtYfaPTRkYiYQVVsH8zMJ7p5riYda6iHGqqPfx1w3Ss/QMS1wC3AUcD/6rY/Fcsa6rGGqD65fAT4WGa+BCyuP17+h4j4m6wT37AwzI2fF1rWc5RtnXzUnQCZeVNE3AF8KCJOojrVfUyj3TTgm8A/tOnj/j7GNo3qfwSHten3sTX02+1H+btSnfr+aaNgpwFExIvArpl5Z5d9qkzWUG819P/JzJciYhGw8xobazKxhnqvoQeBF+ogN2IpVXicTnUmcWgY5obHUuCIiNi48a5oP6pfwqWNdmdRvRNaAmxI9fHJiJuows7dYzy2m4APAo/2ebZsFdVp8dW5EditZdvfApsBfwX8uo/ja3KzhkZRX9e0O/DzPo6tyc8aetU1VIF1Wma+XG/bhepyhUf7OP648AaI4XE21S/Jt+u7ieYC3wB+2FIUZ1Od7v4C8OPMfKqx7yvA3hFxWkTsGRE7RcT7IuIbYzC2h4FzI+IdEbF9RMyt70Tq5p3+MuD3I+JNETG93YWkmfm7zFzSXIAngJX1+qo+fxZNXtZQLSL+d0S8OyJ2iIhZwLeowtxpff4cmtysoVd9HXgDcErd9t3A8cCpw/YRKxjmhkZmPgO8G9gEuAE4l+qalyNa2i0Hrqa6S+isln23AnOp7ua5gupd+JeoCqDfsc2l+oqQH1BdGHom1dmyx7vo6gyqd3eLqE5Rv72fcUlN1tBrvB44vW57CfB7wNzMvKGX8WtqsIZec7zfUN2MN5vq493TqG6G+FxvP8H4iiEMmJIkSeqQZ+YkSZIKZpiTJEkqmGFOkiSpYIY5SZKkgk2Z75mbPn16zpw5c9DDUCEWL178aGbOGPQ4ho11pE5ZQ+1ZQ+pUNzU0ZcLczJkzWbRo0aCHoUJExPJBj2EYWUfqlDXUnjWkTnVTQ37MKkmSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklSwKXM36+rM/OwFgx5CR5Z9+ZBBD0FqyxqS+lNKDYF1NIw8MydJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsGKDHMRsX1EXBYRt0fEbRGx4aDHJJXEGpL6Zx1pWKw96AH0aCFwXGZeFRFvAJ4f8Hik0izEGpL6tRDrSEOguDAXEbsCL2TmVQCZ+diAhyQVxRqS+mcdaZh09DFrRMyNiPMi4v6IyIiY38vBOu0nIo6MiF9HxHMRsTgiDmjs3hl4OiJ+HBE3RcSxvYxFmkjWkNQ/60hqr9Nr5jYClgBHA8/2cbw19hMRhwKnACcCewLXAhdFxLZ1k7WBA4AjgX2BeRExr48xSRPBGpL6Zx1JbXQU5jLzwsw8NjPPAV7u9WAd9vMZYGFmnpGZSzPzKOBB4JP1/vuBRZn5m8x8HrgQmNWuo4j4eEQsiohFK1as6HXYUt9KrSGwjjQ8Sq0ja0jjbajuZo2IdYHZwCUtuy4B9qsf3whsHhGbRcQ0YC6wtF1/mXl6Zs7JzDkzZswYr2FLQ2OsawisI009/r9IpRnzMBcRn4qIG3p8+nRgLeDhlu0PA1sCZOaLwLHAlcCtwF2ZeX6Px5OGjjUk9c860lQyHnezTgd2GYd+X5GZFwEXjecxpAGyhqT+WUeaMsb8zFxmLsjM1/f49EeBl4AtWrZvATzU18CkQlhDUv+sI00lQ3XNXGauAhYDrXcEzaO6k0jSalhDUv+sI5Wmo49ZI2IjYKd6dRqwbUTMAh7LzHs7PViH/ZwMfKe+1uEa4BPA1sBpnR5HGjbWkNQ/60hqr9Mzc3OAm+tlA+D4+vEJXR5vjf1k5veBTwPHAbcA+wPvzczlXR5LGibWkNQ/60hqo6Mzc5l5ORD9HqzTfjLzVODUfo8nDQtrSOqfdSS1N1TXzEmSJKk7hjlJkqSCGeYkSZIKZpiTJEkqmGFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCGeYkSZIKZpiTJEkqmGFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCGeYkSZIKZpiTJEkqmGFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgpmmJMkSSqYYU6SJKlghjlJkqSCrT3oAfQiIpYBTwEvA49n5jsHOyKpPNaR1B9rSMOiyDBX2y8znx70IKTCWUdSf6whDZwfs0qSJBWsozAXEQsiIluWh7o9WETMjYjzIuL+uo/5o7Q7MiJ+HRHPRcTiiDigpUkCV0TEjRFxeLfjkAbBOpL6Yw1J7XVzZu5OYKvGslsPx9sIWAIcDTzbrkFEHAqcApwI7AlcC1wUEds2mu2fmbOBPwKOjYjdexiLNAjWkdQfa0hq0U2YezEzH2osK7o9WGZemJnHZuY5VBeMtvMZYGFmnpGZSzPzKOBB4JONfu6v//sgcCGwV7uOIuLjEbEoIhatWNH1cKXxYB1J/bGGpBbdhLkdIuKB+pTz9yJih7EeTESsC8wGLmnZdQmwX91mw4jYuH68EXAQ8It2/WXm6Zk5JzPnzJgxY6yHK/XCOpL6Yw1JLToNcz8D5gPvAT4GbAlcGxFvbG0YEZ+KiBt6HM90YC3g4ZbtD9fHBNgCuDoifg5cD3w7M2/s8XjSRLKOpP5YQ1IbHX01SWZe1FyPiOuBe4CPACe3NJ8O7DImo2s/lnuAPcarf2m8WEdSf6whqb2evpqk/k6dXwA7t9m3IDNf3+N4HgVeonrH07QF0PUdS9Iws46k/lhDUqWnMBcR6wO9MdxSAAAHb0lEQVRvproYdMxk5ipgMTCvZdc8qjuJpEnDOpL6Yw1JlY4+Zo2Ik4AfA/cCmwOfBzYEzuzmYPVFojvVq9OAbSNiFvBYZt5bbz8Z+E59rcM1wCeArYHTujmWNGysI6k/1pDUXqf/nNc2wL9SXYOwgupiz30yc3mXx5sDXNZYP75ezqS6qJXM/H59MetxVN8htAR4bw/HkoaNdST1xxqS2uj0BojDxuJgmXk5EB20OxU4dSyOKQ0L60jqjzUktee/zSpJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVzDAnSZJUMMOcJElSwQxzkiRJBTPMSZIkFcwwJ0mSVDDDnCRJUsEMc5IkSQUzzEmSJBXMMCdJklQww5wkSVLBDHOSJEkFM8xJkiQVbO1BD0CTz8zPXjDoIXRs2ZcPGfQQpLZKqSNrSMNqKtWQZ+YkSZIKZpiTJEkqmGFOkiSpYJGZgx7DhIiIFcDyCTzkdODRCTzeZDfR87ldZs6YwOMVYYLryBoaexM5p9ZQG9ZQ8YayhqZMmJtoEbEoM+cMehyThfM59fiajz3ndGrx9R57wzqnfswqSZJUMMOcJElSwQxz4+f0QQ9gknE+px5f87HnnE4tvt5jbyjn1GvmJEmSCuaZOUmSpIIZ5iRJkgpmmJMkSSqYYa5wEbEgIpYMehzDLCLmR8TTgx6HhpM11BnrSKtjHa3ZeNaQYU6SJKlkmTmpF+By4FTgRKp/guMR4CRgWqPNZsCZwOPAs8ClwK5r6Hdd4CvAfcAzwI3Au+t904DfAEe1PGcXIIG96vVNqW5zfgRYCVwBzGm0nw88DRwMLAF+B1wGbN/Yny3L/EHPecvPHMAxwK/qub0N+PN637XAV1vab1K3e/+a5rnef2D9cx8M/Kxus6gxxyP7m8uCQc9LSYs1NPjFOip/sY4GPv+TuoYGPsET8AJeDjwJnFD/An8AeBH4YKPNucAdwFxgN+C8ugA2WE2/ZwPX18/ZAfgUsArYo97/d8D1Lc85Hri98Yt1NXABsDewE/AF4Clgq7rNfOCFuqD3BnYHbgYurvdvQPXH4A5gy3oZdcwDmv8vAncC7wG2Bz5U/yE4BDgSuJ/X/jH7C6o/ZOt1OM8jBXID8E7gzcDFwNJ6jtcFjq6POTJHGw16XkparKHBL9ZR+Yt1NPD5n9Q1NPAJnqACuq5l238A36wf71y/AHMb+zeti+4vR+lzR+BlYNuW7T8CTq0f7173u2Nj/13AsfXjg6je6WzQ0sctwDH14/l1H29q7D8ceJ5XvyNwAbBk0PM8yjxtSPXO5oCW7V8DLgTeWBfDwY19lwKndzHPIwXUfIf09nrbNo15fHrQ81HqYg0NfP6to0mwWEcDnftJX0NrMzXc2rL+ALB5/fgtVC/SdSM7M/PJiLgNeOso/e1FlbRvj4jm9vWA/6z7uLXu43DghIh4G9UvxNl129nA64AVLX2sX7cb8Xxm3tky9nWpTsc/Nsr4hsVbqX6en0RENravAyzLzN9GxE+o5uinEbE11TuaE+p2a5znhuZr/ED9382pTomrf9bQ4FhHk4d1NBiTvoamSph7oWU96ezmjxxl+7R63x+06fvZxuOzgI9S/UIcDlydmcsbfTwMHNCm/6caj18cZUwl3LwyMsY/BO5t2Tcyb2cBZ0TEkcBhVB8pXNV4fifzTMv+kuaoFNbQ4FhHk4d1NBiTvoamSphbnaVUE70vcCVARGxCdb3Cv4zynJupUvqWmXnZavr+LvCliNgHOBT4fGPfTcAWwMuZeU8f418FrNXH88fT7VSn4bfLzNZ3LyPOA84A3kf1R+a7WZ+PpvN5XpNhnqPJwBoaX9bR1GAdjZ9JX0NTPsxl5l0RcS7wjYj4OPAE1YWST1EVQLvn/DIizgYWRsRfUxXDG6g+M78nM39Yt7svIq4ATqO69uEHjW4uBa4Bzo2IY3j1wtH3AJdm5lV0ZhmwXUTsRfWOY2VmPt/pzz+eMnNlRJwEnBTVuekrgY2Afaj+cJyemc9FxL8DxwF7AB9uPL+jee7AMmD9iJhHVZTPZOYzY/JDyhoaZ9bR1GAdjZ8pUUPjdcHhsCxUF53+Y8u2hcD5jfVebgdfh+qCz3uo0vZDVMl+dku7I6hOtf6wTR8bA6dQfZa+iuq07veoL1SlzcWSvHqR5fR6fT3gnHrsyXDeDn4Ur74zWkF10e+8RpuD6rHf1O08t85HvW1mva15a/3Xqb4OIPErFbp9Da2hwb8G1lHhi3U08Pmf1DU0cheKJEmSClTChYuSJEkahWFOkiSpYIY5SZKkghnmJEmSCmaYkyRJKphhTpIkqWCGOUmSpIIZ5iRJkgr2/wBMi8y6ABP0GgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "<Figure size 720x504 with 6 Axes>" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize = (10,7))\n", + "plt.subplots_adjust(wspace = 0.5)\n", + "plt.subplots_adjust(hspace = 0.5)\n", + "for i, e in enumerate(all_events.T):\n", + " plt.subplot(2,3,i+1)\n", + " plt.hist(e, [0, 0.5, 1, 1.5])\n", + " plt.xticks([0.25, 1.25], ['no event', 'event'], fontsize = 14)\n", + " plt.yticks([500000, 1000000], [r'$5 \\cdot 10^{5}$', r'$1 \\cdot 10^{6}$'], fontsize = 14) \n", + " plt.title('movement ' + str(i+1), fontsize = 14)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -185,13 +241,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Task .. : Extract time-dependend features.\n", + "The purpose of the feature extraction is to extract time-dependent features from the EEG data. To do so, a sliding window containing 500 datapoints each is used. Three consecutive time windows each predict the event in the following time step.\n", "\n", - "Single steps:\n", - "- define sliding window of length 500 (datapoints)\n", - "- compute the average power per window (power: square of the signal)\n", - "- three consecutive windows predict the event in the following time step\n", - "- the window slides with a step size of 2 throught the dataset" + "Extract time-dependend features from the EEG-data:\n", + "\n", + "- define the start and end points od a sliding window with a length of 500 datapoints and a step size of 2\n", + "- loop through those start and end points\n", + "- per iteration:\n", + " - take three consecutive time windows (window_1 = data[start:end,:], window_2 = data[start+500:end+500,:],\n", + " window_3 = data[start+1000:end+1000,:])\n", + " - compute the average power per window (power: square of the signal)\n", + " - combine the three arrays containing the average power to one array" ] }, { @@ -215,15 +275,15 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 857 ms, sys: 40.8 ms, total: 898 ms\n", - "Wall time: 899 ms\n" + "CPU times: user 2.33 ms, sys: 45 µs, total: 2.38 ms\n", + "Wall time: 1.73 ms\n" ] } ], @@ -234,9 +294,7 @@ "num_feat = 3\n", "num_win = int((all_data.shape[0] - (win_size * num_feat))/step_size)\n", "ix_start = np.arange(0, num_win*step_size - win_size*num_feat, step_size)\n", - "ix_end = ix_start + 500\n", - "\n", - "all_events_resh = np.array([all_events[end + 1501, :] for end in ix_end])" + "ix_end = ix_start + 500" ] }, { @@ -248,28 +306,7 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "# def butter_bandpass(fs, lowcut, highcut, order = 5):\n", - "# nyq = 0.5 * fs\n", - "# low = lowcut / nyq\n", - "# high = highcut / nyq\n", - "# b, a = butter(order, [low, high], btype='band')\n", - "# return b, a\n", - "\n", - "# def butter_bandpass_filter(data):\n", - "# b, a = butter_bandpass(fs = 500, lowcut = 0, highcut = 50)\n", - "# y = lfilter(b, a, data, axis = 0)\n", - "\n", - "# filt_mean_pow = mean_pow(y)\n", - "# return filt_mean_pow" - ] - }, - { - "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -279,22 +316,22 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|█████████▉| 709629/709696 [02:44<00:00, 4331.54it/s]" + "100%|█████████▉| 709344/709696 [02:24<00:00, 4846.28it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 2min 44s, sys: 1.51 s, total: 2min 45s\n", - "Wall time: 2min 45s\n" + "CPU times: user 2min 25s, sys: 1.03 s, total: 2min 26s\n", + "Wall time: 2min 26s\n" ] } ], @@ -302,1108 +339,184 @@ "%%time\n", "\n", "pbar = tqdm(total = len(ix_start))\n", - "filt_data = []\n", + "data_filt = []\n", "for start, end in zip(ix_start, ix_end):\n", + " \n", " pow_1 = mean_pow(all_data[start:end, :])\n", " pow_2 = mean_pow(all_data[start+500:end+500, :])\n", " pow_3 = mean_pow(all_data[start+1000:end+1000, :])\n", - " filt_data.append(np.hstack([pow_1, pow_2, pow_3]))\n", + " data_filt.append(np.hstack([pow_1, pow_2, pow_3]))\n", " \n", " pbar.update(1)\n", " \n", - "filt_data = np.array(filt_data)" + "data_filt = np.array(data_filt)\n", + "events_filt = np.array([all_events[end + 1501, :] for end in ix_end])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "#### Dimensionality reduction" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "pca = PCA(n_components=10)\n", - "filt_data_red = pca.fit_transform(filt_data)" + "### Modeling" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "<matplotlib.collections.PathCollection at 0x2ba7365b2320>" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAEQCAYAAACQip4+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAE8lJREFUeJzt3XuQXGWdxvHnmYQBQoJkySwiIQxGhYmRADsViFwUEAvQJX8sYsArG82iLOUdsbJb62pZq1SBtwLckTXorlwCq1YWCIsKMRADOJEEcgFNICwBNMM9KDom+e0f3TNMJt3TpzN9+vSbfD9VXTNz+p3Tz0xmnrxz+u1zHBECAKSjregAAID6UNwAkBiKGwASQ3EDQGIobgBIDMUNAInJrbhtf8/2ZturM4z9uu2V5dtvbL+QVy4ASJ3zWsdt+2RJL0v6QURMr+PzLpZ0TET8fS7BACBxuc24I2KppOeGbrM91fbttlfYvtv2kRU+9TxJ1+eVCwBSN7bJj9cj6cKI+K3t4yRdJenUgTttHybpcEl3NjkXACSjacVte7ykt0q6yfbA5r2HDZsj6eaI2NasXACQmmbOuNskvRARR48wZo6ki5qUBwCS1LTlgBHxkqTHbL9HklwyY+D+8vHuiZKWNysTAKQoz+WA16tUwkfY3mR7rqT3SZpre5WkNZJmD/mUOZJuCE5XCAAjym05IAAgH7xyEgASk8uTk5MmTYrOzs48dg0Au6UVK1Y8ExEdWcbmUtydnZ3q7e3NY9cAsFuy/XjWsRwqAYDEUNwAkBiKGwASQ3EDQGIobgBIDMUNAImhuAEgMRQ3ACSm2RdSGJWFX56vJ1avGvz40OkzdO4/f6XARADQfDVn3LaPGHIh35W2X7L9yWaEG2p4aUvSE6tXaeGX5zc7CgAUquaMOyIekXS0JNkeI+lJST/OOddOhpd2re0AsLuq9xj3aZI2RETm19QDABqr3uKeoypXYLc9z3av7d6+vr7RJwMAVJS5uG23Szpb0k2V7o+Inojojojujo5MZyYEAOyCembcZ0r6dUT8Pq8wAIDa6inu81TlMAkAoHkyFbft/SSdLulH+cYBANSS6QU4EfEHSQfmnAUAkAEveQeAxFDcAJAYihsAEpNEcf/iuoeLjgAALSOJ4l699KmiIwBAy0iiuAEAr0qouPerczsA7J6SKe59Jv6Ddi7p/fSG4z9fRBwAKExSV8AplfeOZn/q2AKSAEBxkplxAwBKKG4ASEwSxT1u/73q2g4Au7MkivuCy07aqaTH7b+XLrjspIISAUBxknlykpIGgJIkZtwAgFdR3ACQGIobABKT9dJlB9i+2fbDttfZnpV3MABAZVmfnPympNsj4hzb7ZLG5ZgJADCCmsVt+zWSTpb0YUmKiH5J/fnGAgBUk+VQyeGS+iQtsP2A7WvKV30HABQgS3GPlXSspKsj4hhJf5B06fBBtufZ7rXd29fX1+CYAIABWYp7k6RNEXFf+eObVSryHURET0R0R0R3R0dHIzMCAIaoWdwR8TtJT9g+orzpNElrc00FAKgq66qSiyX9sLyi5FFJF+QXCQAwkkzFHRErJXXnnAUAkAGvnASAxFDcAJAYihsAEkNxA0BiKG4ASAzFDQCJobgBIDEUNwAkhuIGgMRQ3ACQGIobABJDcQNAYihuAEgMxQ0AiaG4ASAxFDcAJIbiBoDEUNwAkJhMly6zvVHSFknbJG2NCC5jBgAFyXqxYEk6JSKeyS0JACATDpUAQGKyFndIusP2CtvzKg2wPc92r+3evr6+xiUEAOwga3GfGBHHSjpT0kW2Tx4+ICJ6IqI7Iro7OjoaGhIA8KpMxR0RT5bfbpb0Y0kz8wwFAKiuZnHb3s/2hIH3Jb1T0uq8gwEAKsuyquQgST+2PTD+uoi4PddUAICqahZ3RDwqaUYTsgAAMmA5IAAkhuIGgMRQ3ACQGIobABJDcQNAYihuAEgMxQ0AiaG4ASAxFDcAJIbiBoDEUNwAkBiKGwASQ3EDQGIobgBIDMUNAImhuAEgMRQ3ACSG4gaAxGQubttjbD9g+5Y8AwEARlbPjPsTktblFQQAkE2m4rY9WdK7JF2TbxwAQC1ZZ9zfkHSJpO3VBtieZ7vXdm9fX19DwgEAdlazuG2/W9LmiFgx0riI6ImI7ojo7ujoaFhAAMCOssy4T5B0tu2Nkm6QdKrt/8o1FQCgqprFHRFfiIjJEdEpaY6kOyPi/bknAwBUxDpuAEjM2HoGR8QSSUtySQIAyIQZNwAkhuIGgMRQ3ACQGIobABJDcQNAYihuAEgMxQ0AiaG4ASAxFDcAJIbiBoDEUNwAkBiKGwASQ3EDQGIobgBIDMUNAImhuAEgMRQ3ACQmy1Xe97F9v+1VttfY/tdmBAMAVJbl0mV/lnRqRLxsey9J99heHBH35pwNAFBBzeKOiJD0cvnDvcq3yDMUAKC6TMe4bY+xvVLSZkk/jYj7KoyZZ7vXdm9fX1+jcwIAyjIVd0Rsi4ijJU2WNNP29ApjeiKiOyK6Ozo6Gp0TAFBW16qSiHhB0l2SzsgnDgCgliyrSjpsH1B+f19Jp0t6OO9gAIDKsqwqOVjS922PUanoF0bELfnGAgBUk2VVyYOSjmlCFgBABrxyEgASQ3EDQGIobgBIDMUNAImhuAEgMRQ3ACSG4gaAxFDcAJAYihsAEkNxA0BiKG4ASAzFDQCJobgBIDEUNwAkhuIGgMRQ3ACQGIobABJDcQNAYrJcLPhQ23fZXmt7je1PNCMYAKCyLBcL3irpMxHxa9sTJK2w/dOIWJtzNgBABTVn3BHxdET8uvz+FknrJB2SdzAAQGV1HeO23anSFd/vq3DfPNu9tnv7+voakw4AsJPMxW17vKT/lvTJiHhp+P0R0RMR3RHR3dHR0ciMAIAhMhW37b1UKu0fRsSP8o0EABhJllUllvQfktZFxBX5RwIAjCTLjPsESR+QdKrtleXbWTnnAgBUUXM5YETcI8lNyAIAyIBXTgJAYihuAEgMxQ0AiaG4ASAxFDcAJIbiBoDEUNwAkBiKGwASQ3EDQGIobgBIDMUNAImhuAEgMVmuOVmIzktv3Wnbxq++q4AkANBaWnLGXam0R9oOAHuSlixuAEB1FDcAJIbiBoDEZLnm5Pdsb7a9uhmBAAAjyzLjvlbSGTnnAABkVLO4I2KppOeakAUAkEHLruNe336+xgy5RPG2kN7Qf11xgQCgRTSsuG3PkzRPkqZMmTKqfa1vP1/nTD5IG9rbB7dN7e/X+k3nS3pxVPsGgNQ1rLgjokdSjyR1d3fHaPZ1zuSD9MWvj1F7bBvc1u8xOudTB+kno4sJAMlryeWApdKWrFdv7VHaDgB7uizLAa+XtFzSEbY32Z6bd6iB0t4hR3k7AOzpah4qiYjzmhEEAJBNSx4qAQBUR3EDQGIobgBIDMUNAImhuAEgMRQ3ACSmJYt73KzjNXzJdpS3A8CeriWLu3PBgp1Ketys49W5YEFBiQCgdbTs2QEpaQCorCVn3ACA6ihuAEgMxQ0AiaG4ASAxFDcAJIbiBoDEtOxyQLSudXffpduv/oa2b9tWe/Awhx31dp0z/7M5pAL2HC1b3GuO7NrhKjgh6c0Pr6trH+/77nIt2/Dc4McnTP0r/fCjsxoTcA+17u67dNuVV0ixa5cjevzBJbr8vUuqD2g7VPu85j019zPxtfvq/C/yb4k9k2MXfwFH0t3dHb29vbv8+Ze/9907bTtz1QZJ2Qt8aGnfeNtnNaG/8jhL6qrzP4Q9Wc9FF2jLM335PgjljT2Q7RUR0Z1lbKZj3LbPsP2I7fW2Lx1dvJFVKm1JWjxjqhbPmCqrNBuvZXhpD73w8NBbSFqXYX8o2fLsM/k/yPYnMg17/nev5BwEaE01D5XYHiPpSkmnS9ok6Ve2F0XE2rzDVbJ4xtRSrnLZhqQ5n2/Tisc26ZuPnDT4P9HF5bf3dE3VxLHnataKf6u4v4HyXntkl7a0S8c9WHv2feWFd+pPz1+x0/bP3HhLfV9MgiYcOCn/GTeQo40XXKBXlt87+HG1Yw4vt0szM/RBEbLMuGdKWh8Rj0ZEv6QbJM3ON1ZtA7NvS7rha9v1zUdOqPrFPL91oZb/zReq7mtgPxP6pfuO6lLnpbcO3oarVtpS9b8WdicnzfmgZNceCLSg4aUtVf9rfHx/aUI3/LbmyC5d+Llpesu10/XRq6c2/WuQsj05eYikoX+7bpJ03PBBtudJmidJU6ZMaUi4LDz4duQyeWX8IZn2NaFfOrvtHi3afqIkqfPSW7Xxq+8aZcrdR9dJp0jSLq8qyaTt0EzDJr5233weH7ut4aU9kmqNYkkX/09I3q5l0/bV3Kun6uePf2uncXn2RsNWlUREj6QeqfTkZKP2W4RLxi7Uov4Ti47RsrpOOmWwwGv5/iVf0TOPL8++8ypPTG798zpt/eMdkl79z+Lp56XL31t5N5MOm6UPXTY/++MCdWiTdP6S0LI3W/fvW3kCkeekL0txPylp6BRocnlbcgb+N6n1h/7r/GzeUfYYoynPX1z3sFYvfapc2ovr+txnHl/etENXM04/S+/4yMeb8lhoHQe+VNxjZznG/StJb7R9uO12SXMkLco3VnYxeBt5kj/x4HGa9vC6wScjq+1rS7v0VBzY2JDYJWvueUqStPVP9xScZGSrfnqbfnbNVUXHQAb7NvAqWs/u37Bd1a1mcUfEVkn/KOl/Ja2TtDAi1uQVKOvKjDNXbRgs7Tmfb9Mnjlim7VXGHtw1f3C9b9eQ8h5+29IufeRTbbps67lVH/ei75ya9UvBKMXAP+j2LYXmyOLBn99edARk0LlgwU7lPbwHstgu6bq3W4rQzFeavyw10zHuiLhN0m05ZxlU77K6h8pvP5dx/MALbt6y4M07rpCI0Cm/PXvwiUmp8hMMn7nxlop/hu8JywGbyW3l8m6b0PLlHdurTRvQaka6utb9R3VpfJUX6w0ISd/+W2vZtDbNfOWVik9M5q0lXzkJSKM7xt1sbmvTp69vmSOIaJJKS4YH1PvEZMNfOQkU4W3nH6npJ79OY/fu0thxZ0oaU3Skqo467YyiI6AA1co57yXEzLixW7jz2p/ogcULNHS5YLOwqgSNUM+Mm+IGgBbAoRIA2I1R3ACQGIobABJDcQNAYihuAEgMxQ0AiaG4ASAxFDcAJCaXF+DY7pP0eIN2N0lSE65Q23Dkbi5yNxe5G++wiOjIMjCX4m4k271ZX03USsjdXORuLnIXi0MlAJAYihsAEpNCcfcUHWAXkbu5yN1c5C5Qyx/jBgDsKIUZNwBgCIobABLTMsVt+wzbj9heb/vSCvfvbfvG8v332e5sfsqdZcj9adtrbT9o++e2Dysi53C1cg8Z93e2w3bhS6iyZLZ9bvn7vcb2dc3OWEmGn5Eptu+y/UD55+SsInIOZ/t7tjfbXl3lftv+VvnretD2sc3OWEmG3O8r533I9i9tz2h2xlGLiMJvKl1McIOk10tql7RK0rRhYz4u6Tvl9+dIujGR3KdIGld+/2Op5C6PmyBpqaR7JXW3emZJb5T0gKSJ5Y//OoXvtUpPmH2s/P40SRuLzl3OcrKkYyWtrnL/WZIWS7Kk4yXdV3TmjLnfOuRn5MxWyV3PrVVm3DMlrY+IRyOiX9INkmYPGzNb0vfL798s6TTbbmLGSmrmjoi7IuKP5Q/vlTS5yRkryfL9lqQvS/qapD81M1wVWTJ/VNKVEfG8JEXE5iZnrCRL7pC0f/n910h6qon5qoqIpZKeG2HIbEk/iJJ7JR1g++DmpKuuVu6I+OXAz4ha53eyLq1S3IdIemLIx5vK2yqOiYitkl6UdGBT0lWXJfdQc1WaoRStZu7yn72HRsStzQw2gizf6zdJepPtZbbvtd0Kl17PkvuLkt5ve5Ok2yRd3Jxoo1bvz38rapXfybqMLTrAnsL2+yV1S3pb0Vlqsd0m6QpJHy44Sr3GqnS45O0qzaKW2n5LRLxQaKrazpN0bURcbnuWpP+0PT0ithcdbHdm+xSVivvEorPUq1Vm3E9KOnTIx5PL2yqOsT1WpT8pn21Kuuqy5Jbtd0iaL+nsiPhzk7KNpFbuCZKmS1pie6NKxy8XFfwEZZbv9SZJiyLiLxHxmKTfqFTkRcqSe66khZIUEcsl7aPSyZBaXaaf/1Zk+yhJ10iaHRFF90jdWqW4fyXpjbYPt92u0pOPi4aNWSTpQ+X3z5F0Z5SfXShQzdy2j5H07yqVdiscc5Vq5I6IFyNiUkR0RkSnSscBz46I3mLiSsr2M/ITlWbbsj1JpUMnjzYzZAVZcv+fpNMkyXaXSsXd19SUu2aRpA+WV5ccL+nFiHi66FC12J4i6UeSPhARvyk6zy4p+tnRIc/0nqXSDGmDpPnlbV9SqTCk0g/zTZLWS7pf0uuLzpwx988k/V7SyvJtUdGZs+QeNnaJCl5VkvF7bZUO8ayV9JCkOUVnzph7mqRlKq04WSnpnUVnLue6XtLTkv6i0l8zcyVdKOnCId/vK8tf10Ot8DOSMfc1kp4f8jvZW3Tmem+85B0AEtMqh0oAABlR3ACQGIobABJDcQNAYihuABilWie2GjZ21CcVo7gBYPSulZT1FAv/JGlhRByj0rr+q+p9MIobAEYpKpzYyvZU27fbXmH7bttHDgzXKE8qxrlKACAfPSq96Oe3to9TaWZ9qkonFbvD9sWS9pP0jnp3THEDQIPZHq/Seb9vGnL26b3Lb0d9UjGKGwAar03SCxFxdIX75qp8PDwiltseOKlY5nMZcYwbABosIl6S9Jjt90iDl3kbuETaqE8qxrlKAGCUbF+v0pkpJ6l0Url/kXSnpKslHSxpL0k3RMSXbE+T9F1J41V6ovKSiLijrsejuAEgLRwqAYDEUNwAkBiKGwASQ3EDQGIobgBIDMUNAImhuAEgMf8PzJBpgmBBlgYAAAAASUVORK5CYII=\n", - "text/plain": [ - "<Figure size 432x288 with 1 Axes>" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" + "name": "stderr", + "output_type": "stream", + "text": [ + "\r", + "100%|██████████| 709696/709696 [02:40<00:00, 4846.28it/s]" + ] } ], "source": [ - "plt.scatter(filt_data_red[all_events_resh[:,0] == 1, 0], filt_data_red[all_events_resh[:,0] == 1, 1])\n", - "plt.scatter(filt_data_red[all_events_resh[:,1] == 1, 0], filt_data_red[all_events_resh[:,1] == 1, 1])\n", - "plt.scatter(filt_data_red[all_events_resh[:,2] == 1, 0], filt_data_red[all_events_resh[:,2] == 1, 1])\n", - "plt.scatter(filt_data_red[all_events_resh[:,3] == 1, 0], filt_data_red[all_events_resh[:,3] == 1, 1])\n", - "plt.scatter(filt_data_red[all_events_resh[:,4] == 1, 0], filt_data_red[all_events_resh[:,4] == 1, 1])\n", - "plt.scatter(filt_data_red[all_events_resh[:,5] == 1, 0], filt_data_red[all_events_resh[:,5] == 1, 1])" + "# split of the data\n", + "from sklearn.model_selection import train_test_split\n", + "X_train, X_test, y_train, y_test = train_test_split(data_filt, events_filt,\\\n", + " test_size = 0.33, shuffle = True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Modeling" + "#### Pipeline with single classifier" ] }, { - "cell_type": "code", - "execution_count": 12, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# split of the data\n", - "X_train, X_test, y_train, y_test = train_test_split(filt_data_red, all_events_resh,\\\n", - " test_size = 0.33, shuffle = True)" + "1. Define a pipeline which includes:\n", + " - PCA to reduce the data to 10 dimensions\n", + " - Scaling of the data\n", + " - a classifier of your choice (e.g. LogisticRegression, AdaBoost...)\n", + "\n", + "\n", + "2. Choose an appropriate parametrization of the classifier according to the imbalance of the data.\n", + "\n", + "\n", + "3. Transfer the multi-class classification problem into a one-vs-rest classification.\n", + "\n", + "\n", + "4. Use cross-validation to test the model performance (scoring: 'roc_auc', cv = 5)." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 13, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "0.5015816548374611\n", - "[[227893 2]\n", - " [ 6285 20]]\n", - "0.5021839332037699\n", - "[[227838 9]\n", - " [ 6325 28]]\n", - "0.5020450496683306\n", - "[[227849 1]\n", - " [ 6324 26]]\n", - "0.5004700720777185\n", - "[[227818 0]\n", - " [ 6376 6]]\n", - "0.5048009615038914\n", - "[[227503 195]\n", - " [ 6434 68]]\n", - "0.5064005635338957\n", - "[[227395 311]\n", - " [ 6402 92]]\n", - "CPU times: user 5min 52s, sys: 402 ms, total: 5min 53s\n", - "Wall time: 5min 54s\n" + "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n", + " from numpy.core.umath_tests import inner1d\n" ] } ], "source": [ - "%%time\n", - "abc = AdaBoostClassifier()\n", - "\n", - "all_pred = []\n", - "all_labels = []\n", - "for i in range(6):\n", - "\n", - " abc.fit(X_train, y_train[:,i])\n", - " y_pred = abc.predict(X_test)\n", + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.decomposition import PCA\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.ensemble import AdaBoostClassifier\n", "\n", - " all_pred.append(y_pred)\n", - " all_labels.append(y_test[:,i])\n", - " print(roc_auc_score(y_test[:,i], y_pred))\n", - " print(confusion_matrix(y_test[:,i], y_pred))" + "p = make_pipeline(PCA(10), AdaBoostClassifier())" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "0.5712988484634542\n", - "[[ 89209 138686]\n", - " [ 1569 4736]]\n", - "0.5357646342979414\n", - "[[ 70740 157107]\n", - " [ 1518 4835]]\n", - "0.5398906933868289\n", - "[[ 64717 163133]\n", - " [ 1297 5053]]\n", - "0.5683459761359975\n", - "[[ 62697 165121]\n", - " [ 884 5498]]\n", - "0.6282825974068698\n", - "[[ 63077 164621]\n", - " [ 133 6369]]\n", - "0.6396764403905532\n", - "[[ 68905 158801]\n", - " [ 151 6343]]\n", - "CPU times: user 16 s, sys: 48.9 ms, total: 16.1 s\n", - "Wall time: 16.1 s\n" + "0.7933328086071505\n", + "0.784015968886133\n", + "0.7844319663739039\n", + "0.7813440191247094\n", + "0.8983360445273207\n", + "0.9057395329865956\n", + "CPU times: user 23min 11s, sys: 28.6 s, total: 23min 40s\n", + "Wall time: 23min 40s\n" ] } ], "source": [ "%%time\n", - "lr = LogisticRegression(class_weight='balanced')\n", - "\n", - "all_pred = []\n", - "all_labels = []\n", + "from sklearn.model_selection import cross_val_score\n", "for i in range(6):\n", - "\n", - " lr.fit(X_train, y_train[:,i])\n", - " y_pred = lr.predict(X_test)\n", - "\n", - " all_pred.append(y_pred)\n", - " all_labels.append(y_test[:,i])\n", - " print(roc_auc_score(y_test[:,i], y_pred))\n", - " print(confusion_matrix(y_test[:,i], y_pred))" + " print(cross_val_score(p, X_train, y_train[:,i], scoring=\"roc_auc\", cv=5).mean())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Pipeline with Voting classifier" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Instead of using a single classifier, use a VotingClassifier including \n", + "- Linear Discriminant Analysis\n", + "- Random Forest Classifier\n", + "- Logistic Regression." ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 15, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "0.7815740181416588\n", - "[[227810 85]\n", - " [ 2752 3553]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7835624299156497\n", - "[[227845 2]\n", - " [ 2750 3603]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.7865354330708662\n", - "[[227850 0]\n", - " [ 2711 3639]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.8096969591358668\n", - "[[227817 1]\n", - " [ 2429 3953]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9251682147781864\n", - "[[227589 109]\n", - " [ 970 5532]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/cluster/apps/python/3.6.1/x86_64/lib64/python3.6/site-packages/sklearn/preprocessing/label.py:151: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty.\n", - " if diff:\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.9337942375789381\n", - "[[227605 101]\n", - " [ 857 5637]]\n", - "CPU times: user 2min 6s, sys: 346 ms, total: 2min 6s\n", - "Wall time: 2min 6s\n" + "0.9974193925440987\n", + "0.9981385930482368\n", + "0.9979205470458089\n", + "0.9975658676432658\n", + "0.9985596468772728\n", + "0.9984539704977335\n", + "CPU times: user 9min 57s, sys: 29.4 s, total: 10min 26s\n", + "Wall time: 10min 26s\n" ] } ], "source": [ "%%time\n", + "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA\n", + "from sklearn.ensemble import RandomForestClassifier, VotingClassifier\n", + "\n", "lda = LDA()\n", "rf = RandomForestClassifier(class_weight = 'balanced')\n", "lr = LogisticRegression(class_weight = 'balanced')\n", "\n", "eclf = VotingClassifier(estimators=[('lda', lda), ('rf', rf), ('lr', lr)], voting = 'soft', weights=[1,1,1])\n", "\n", - "all_pred = []\n", - "all_labels = []\n", - "for i in range(6):\n", - "\n", - " eclf.fit(X_train, y_train[:,i])\n", - " y_pred = eclf.predict(X_test)\n", + "p = make_pipeline(PCA(10), eclf)\n", "\n", - " all_pred.append(y_pred)\n", - " all_labels.append(y_test[:,i])\n", - " print(roc_auc_score(y_test[:,i], y_pred))\n", - " print(confusion_matrix(y_test[:,i], y_pred))" + "for i in range(6):\n", + " print(cross_val_score(p, X_train, y_train[:,i], scoring=\"roc_auc\", cv=5).mean())" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "<div>\n", - "<style>\n", - " .dataframe thead tr:only-child th {\n", - " text-align: right;\n", - " }\n", - "\n", - " .dataframe thead th {\n", - " text-align: left;\n", - " }\n", - "\n", - " .dataframe tbody tr th {\n", - " vertical-align: top;\n", - " }\n", - "</style>\n", - "<table border=\"1\" class=\"dataframe\">\n", - " <thead>\n", - " <tr style=\"text-align: right;\">\n", - " <th></th>\n", - " <th>id</th>\n", - " <th>HandStart</th>\n", - " <th>FirstDigitTouch</th>\n", - " <th>BothStartLoadPhase</th>\n", - " <th>LiftOff</th>\n", - " <th>Replace</th>\n", - " <th>BothReleased</th>\n", - " </tr>\n", - " </thead>\n", - " <tbody>\n", - " <tr>\n", - " <th>0</th>\n", - " <td>subj10_series1_0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>100</th>\n", - " <td>subj10_series1_100</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>200</th>\n", - " <td>subj10_series1_200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>300</th>\n", - " <td>subj10_series1_300</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>400</th>\n", - " <td>subj10_series1_400</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>500</th>\n", - " <td>subj10_series1_500</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>600</th>\n", - " <td>subj10_series1_600</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>700</th>\n", - " <td>subj10_series1_700</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>800</th>\n", - " <td>subj10_series1_800</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>900</th>\n", - " <td>subj10_series1_900</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1000</th>\n", - " <td>subj10_series1_1000</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1100</th>\n", - " <td>subj10_series1_1100</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1200</th>\n", - " <td>subj10_series1_1200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1300</th>\n", - " <td>subj10_series1_1300</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1400</th>\n", - " <td>subj10_series1_1400</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1500</th>\n", - " <td>subj10_series1_1500</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1600</th>\n", - " <td>subj10_series1_1600</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1700</th>\n", - " <td>subj10_series1_1700</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1800</th>\n", - " <td>subj10_series1_1800</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>1900</th>\n", - " <td>subj10_series1_1900</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2000</th>\n", - " <td>subj10_series1_2000</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2100</th>\n", - " <td>subj10_series1_2100</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2200</th>\n", - " <td>subj10_series1_2200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2300</th>\n", - " <td>subj10_series1_2300</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2400</th>\n", - " <td>subj10_series1_2400</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2500</th>\n", - " <td>subj10_series1_2500</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2600</th>\n", - " <td>subj10_series1_2600</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2700</th>\n", - " <td>subj10_series1_2700</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2800</th>\n", - " <td>subj10_series1_2800</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>2900</th>\n", - " <td>subj10_series1_2900</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>...</th>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " <td>...</td>\n", - " </tr>\n", - " <tr>\n", - " <th>259400</th>\n", - " <td>subj10_series1_259400</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>259500</th>\n", - " <td>subj10_series1_259500</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>259600</th>\n", - " <td>subj10_series1_259600</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>259700</th>\n", - " <td>subj10_series1_259700</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>259800</th>\n", - " <td>subj10_series1_259800</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>259900</th>\n", - " <td>subj10_series1_259900</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260000</th>\n", - " <td>subj10_series1_260000</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260100</th>\n", - " <td>subj10_series1_260100</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260200</th>\n", - " <td>subj10_series1_260200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260300</th>\n", - " <td>subj10_series1_260300</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260400</th>\n", - " <td>subj10_series1_260400</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260500</th>\n", - " <td>subj10_series1_260500</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260600</th>\n", - " <td>subj10_series1_260600</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260700</th>\n", - " <td>subj10_series1_260700</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260800</th>\n", - " <td>subj10_series1_260800</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>260900</th>\n", - " <td>subj10_series1_260900</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261000</th>\n", - " <td>subj10_series1_261000</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261100</th>\n", - " <td>subj10_series1_261100</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>1</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261200</th>\n", - " <td>subj10_series1_261200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261300</th>\n", - " <td>subj10_series1_261300</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261400</th>\n", - " <td>subj10_series1_261400</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261500</th>\n", - " <td>subj10_series1_261500</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261600</th>\n", - " <td>subj10_series1_261600</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261700</th>\n", - " <td>subj10_series1_261700</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261800</th>\n", - " <td>subj10_series1_261800</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>261900</th>\n", - " <td>subj10_series1_261900</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>262000</th>\n", - " <td>subj10_series1_262000</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>262100</th>\n", - " <td>subj10_series1_262100</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>262200</th>\n", - " <td>subj10_series1_262200</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " <tr>\n", - " <th>262300</th>\n", - " <td>subj10_series1_262300</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " <td>0</td>\n", - " </tr>\n", - " </tbody>\n", - "</table>\n", - "<p>2624 rows × 7 columns</p>\n", - "</div>" - ], - "text/plain": [ - " id HandStart FirstDigitTouch BothStartLoadPhase \\\n", - "0 subj10_series1_0 0 0 0 \n", - "100 subj10_series1_100 0 0 0 \n", - "200 subj10_series1_200 0 0 0 \n", - "300 subj10_series1_300 0 0 0 \n", - "400 subj10_series1_400 0 0 0 \n", - "500 subj10_series1_500 0 0 0 \n", - "600 subj10_series1_600 0 0 0 \n", - "700 subj10_series1_700 0 0 0 \n", - "800 subj10_series1_800 0 0 0 \n", - "900 subj10_series1_900 0 0 0 \n", - "1000 subj10_series1_1000 0 0 0 \n", - "1100 subj10_series1_1100 0 0 0 \n", - "1200 subj10_series1_1200 0 0 0 \n", - "1300 subj10_series1_1300 0 0 0 \n", - "1400 subj10_series1_1400 0 0 0 \n", - "1500 subj10_series1_1500 0 0 0 \n", - "1600 subj10_series1_1600 0 0 0 \n", - "1700 subj10_series1_1700 0 0 0 \n", - "1800 subj10_series1_1800 0 0 0 \n", - "1900 subj10_series1_1900 0 0 0 \n", - "2000 subj10_series1_2000 0 0 0 \n", - "2100 subj10_series1_2100 0 0 0 \n", - "2200 subj10_series1_2200 0 0 0 \n", - "2300 subj10_series1_2300 0 0 0 \n", - "2400 subj10_series1_2400 0 0 0 \n", - "2500 subj10_series1_2500 0 0 0 \n", - "2600 subj10_series1_2600 1 0 0 \n", - "2700 subj10_series1_2700 0 0 0 \n", - "2800 subj10_series1_2800 0 1 1 \n", - "2900 subj10_series1_2900 0 0 0 \n", - "... ... ... ... ... \n", - "259400 subj10_series1_259400 0 0 0 \n", - "259500 subj10_series1_259500 0 0 0 \n", - "259600 subj10_series1_259600 0 0 0 \n", - "259700 subj10_series1_259700 0 0 0 \n", - "259800 subj10_series1_259800 0 0 0 \n", - "259900 subj10_series1_259900 0 0 0 \n", - "260000 subj10_series1_260000 0 0 0 \n", - "260100 subj10_series1_260100 0 0 0 \n", - "260200 subj10_series1_260200 0 0 0 \n", - "260300 subj10_series1_260300 0 0 0 \n", - "260400 subj10_series1_260400 0 0 0 \n", - "260500 subj10_series1_260500 0 0 0 \n", - "260600 subj10_series1_260600 0 0 0 \n", - "260700 subj10_series1_260700 0 0 0 \n", - "260800 subj10_series1_260800 0 0 0 \n", - "260900 subj10_series1_260900 0 0 0 \n", - "261000 subj10_series1_261000 0 0 0 \n", - "261100 subj10_series1_261100 0 0 0 \n", - "261200 subj10_series1_261200 0 0 0 \n", - "261300 subj10_series1_261300 0 0 0 \n", - "261400 subj10_series1_261400 0 0 0 \n", - "261500 subj10_series1_261500 0 0 0 \n", - "261600 subj10_series1_261600 0 0 0 \n", - "261700 subj10_series1_261700 0 0 0 \n", - "261800 subj10_series1_261800 0 0 0 \n", - "261900 subj10_series1_261900 0 0 0 \n", - "262000 subj10_series1_262000 0 0 0 \n", - "262100 subj10_series1_262100 0 0 0 \n", - "262200 subj10_series1_262200 0 0 0 \n", - "262300 subj10_series1_262300 0 0 0 \n", - "\n", - " LiftOff Replace BothReleased \n", - "0 0 0 0 \n", - "100 0 0 0 \n", - "200 0 0 0 \n", - "300 0 0 0 \n", - "400 0 0 0 \n", - "500 0 0 0 \n", - "600 0 0 0 \n", - "700 0 0 0 \n", - "800 0 0 0 \n", - "900 0 0 0 \n", - "1000 0 0 0 \n", - "1100 0 0 0 \n", - "1200 0 0 0 \n", - "1300 0 0 0 \n", - "1400 0 0 0 \n", - "1500 0 0 0 \n", - "1600 0 0 0 \n", - "1700 0 0 0 \n", - "1800 0 0 0 \n", - "1900 0 0 0 \n", - "2000 0 0 0 \n", - "2100 0 0 0 \n", - "2200 0 0 0 \n", - "2300 0 0 0 \n", - "2400 0 0 0 \n", - "2500 0 0 0 \n", - "2600 0 0 0 \n", - "2700 0 0 0 \n", - "2800 0 0 0 \n", - "2900 1 0 0 \n", - "... ... ... ... \n", - "259400 0 0 0 \n", - "259500 0 0 0 \n", - "259600 0 0 0 \n", - "259700 0 0 0 \n", - "259800 0 0 0 \n", - "259900 0 0 0 \n", - "260000 0 0 0 \n", - "260100 0 0 0 \n", - "260200 0 0 0 \n", - "260300 0 0 0 \n", - "260400 0 0 0 \n", - "260500 0 0 0 \n", - "260600 0 0 0 \n", - "260700 0 0 0 \n", - "260800 0 0 0 \n", - "260900 0 1 0 \n", - "261000 0 1 0 \n", - "261100 0 0 1 \n", - "261200 0 0 0 \n", - "261300 0 0 0 \n", - "261400 0 0 0 \n", - "261500 0 0 0 \n", - "261600 0 0 0 \n", - "261700 0 0 0 \n", - "261800 0 0 0 \n", - "261900 0 0 0 \n", - "262000 0 0 0 \n", - "262100 0 0 0 \n", - "262200 0 0 0 \n", - "262300 0 0 0 \n", - "\n", - "[2624 rows x 7 columns]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.read_csv('../ml-use-case-eeg/train/subj10_series1_events.csv').loc[::100, :]" - ] + "outputs": [], + "source": [] }, { "cell_type": "code",