# Importing Python libraries needed for data analysis 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
import math
import plotly.io as pio
pio.renderers.default='notebook'
from sklearn.cluster import KMeans
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
%matplotlib inline


# reading dataset 
dataset = pd.read_excel('filename.xlsx').iloc[:,:-1]


! python --version

Python 3.7.12


dataset.shape

(189, 46)


# fixing columns spaces 
cols=[]
for col in dataset.columns:
    if col.startswith('Unnamed'):
        cols.append(cols[-1])
    else:
        cols.append(col.strip())
dataset.columns=cols
dataset.columns=[i+'--'+str(z).strip() for i,z in zip(dataset.columns,dataset.iloc[0,:].fillna('').values)]
dataset.columns=[col.strip('--') for col in dataset.columns]


# Data transforming 
dataset['Consciousness Time']=pd.to_numeric(dataset['Consciousness Time'],errors='coerce')
dataset.drop(0,inplace=True)
dataset.dropna(inplace=True)
dataset.reset_index(inplace=True,drop=True)
dataset.columns

Index(['Nr', 'Location', 'Sex', 'Age', 'Age onset', 'Years with ES',
       'Seizure Type', 'Laterality', 'Behavior before', 'Same day ES before',
       'ES before', 'Ictal Seconds', 'Ictal signs and symtoms--MA',
       'Ictal signs and symtoms--OA', 'Ictal signs and symtoms--SMA',
       'Ictal signs and symtoms--Laughing',
       'Ictal signs and symtoms--Coughing', 'Ictal signs and symtoms--NRR',
       'Ictal signs and symtoms--NRL', 'Ictal signs and symtoms--Vo',
       'Ictal signs and symtoms--Gaze', 'Ictal signs and symtoms--VA',
       'Ictal signs and symtoms--Hiccup', 'Consciousness Time',
       'Postictal signs and symptoms--MA', 'Postictal signs and symptoms--OA',
       'Postictal signs and symptoms--NRR',
       'Postictal signs and symptoms--NRL',
       'Postictal signs and symptoms--Smacking',
       'Postictal signs and symptoms--Smile',
       'Postictal signs and symptoms--Laughing',
       'Postictal signs and symptoms--Coughing',
       'Postictal signs and symptoms--Vo',
       'Postictal signs and symptoms--Gape',
       'Postictal signs and symptoms--Hipcup',
       'Postictal signs and symptoms--Motor restless',
       'Postictal signs and symptoms--Speaks incomprehensible',
       'Postictal signs and symptoms--Cloni Arm',
       'Postictal signs and symptoms--Stand up', 'Level of Consciousness',
       'Coughing Time seconds--Coughing #1',
       'Coughing Time seconds--Coughing #2',
       'Coughing Time seconds--Coughing #3',
       'Coughing Time seconds--Coughing #4', 'Disnomia seconds', 'Aphasia TT'],
      dtype='object')


# Examining the dataframe 
dataset.head(25)


# scatterplot graph , features transforming 
df1=dataset.loc[:,'Sex':'Ictal Seconds']
for col in df1.columns[1:-1]:
    df1[col]=df1[col].astype(int).astype(str)
df1['Age']=df1['Age'].astype(int)
df1['Consciousness Time']=pd.to_numeric(dataset['Consciousness Time'])
sns.set(rc={'figure.figsize':(20,20)})
g = sns.FacetGrid(df1, col="Sex", height=8.27, aspect=11.7/8.27)
g.map(sns.scatterplot, 'Consciousness Time', 'Age', alpha=.7)
g.add_legend()

<seaborn.axisgrid.FacetGrid at 0x1d080c16fa0>


g = sns.FacetGrid(df1, col="Laterality", height=4, aspect=.5)
g.map(sns.barplot, "Sex", "Consciousness Time")

C:\Users\tommy\anaconda3\lib\site-packages\seaborn\axisgrid.py:670: UserWarning:

Using the barplot function without specifying `order` is likely to produce an incorrect plot.

<seaborn.axisgrid.FacetGrid at 0x1d0814e39a0>


g = sns.FacetGrid(df1, col="Behavior before", height=4, aspect=.5)
g.map(sns.barplot, "Sex", "Consciousness Time")

C:\Users\tommy\anaconda3\lib\site-packages\seaborn\axisgrid.py:670: UserWarning:

Using the barplot function without specifying `order` is likely to produce an incorrect plot.

<seaborn.axisgrid.FacetGrid at 0x1d08198d520>


##age wise distribution plot
g = sns.FacetGrid(df1, row="Sex",
                  height=1.7, aspect=4,)
g.map(sns.kdeplot, "Consciousness Time")

<seaborn.axisgrid.FacetGrid at 0x1d0819a0bb0>


df=dataset.loc[:,'Sex':'Consciousness Time']


corr_df=df.corr()
fig = px.imshow(corr_df)
fig.update_layout(title='Correlation comparion for main features',width=1000,height=1000
 )
fig.show()
# fig.write_html(r'D:\Machine learning\Epilepsia SUSANA\correlacion.html')
#with open("correlacion.html", "r", encoding='utf-8') as f:
#    text= f.read()
import codecs
# f = codecs.open("correlacion.html", 'r', 'utf-8')


def age_group(x):
    if x<=20:
        return '<21'
    elif 20<x<=29:
        return '20-29'
    elif 29<x<=39:
        return '29-39'
    elif 39<x<=49:
        return '39-49'
    elif 49<x<=59:
        return'49-59'
    else:
        return '60+'
df['Age_group']=df['Age'].apply(age_group)


df_age_group=df.groupby('Age_group')['Age'].count().reset_index().rename(columns={'Age':'count'})


df_age_group.head(10)


fig = px.bar(df_age_group, x="Age_group", y="count",color="Age_group")
fig.update_layout(
    title="Number of patients in each group",
   
)
fig.show()


##dropping age and encoding age groups
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
df['Age_group']=encoder.fit_transform(df['Age_group'])
df.drop(columns=['Age'],inplace=True)


# Using Skicit-learn to split data into training and testing sets
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets

train_features, test_features, train_labels, test_labels = train_test_split(df.iloc[:,:-1], df.iloc[:,-1], test_size = 0.25, random_state = 42)


# data shape for test and training subsets
print('Training Features Shape:', train_features.shape)
print('Training Labels Shape:', train_labels.shape)
print('Testing Features Shape:', test_features.shape)
print('Testing Labels Shape:', test_labels.shape)

Training Features Shape: (138, 21)
Training Labels Shape: (138,)
Testing Features Shape: (46, 21)
Testing Labels Shape: (46,)


# Import the model we are using
from sklearn.ensemble import RandomForestRegressor
# Instantiate model with 1000 decision trees
rf = RandomForestRegressor(n_estimators = 1000, random_state = 42)
# Train the model on training data
rf.fit(train_features, train_labels);


# Use the forest's predict method on the test data
predictions = rf.predict(test_features)
# Calculate the absolute errors
errors = abs(predictions - test_labels)
# Print out the mean absolute error (mae)
print('Mean Absolute Error:', round(np.mean(errors), 2))

Mean Absolute Error: 0.91


# Index sort the most important features
sorted_feature_weight_idxes = np.argsort(rf.feature_importances_)[::-1] # Reverse sort

# Get the most important features names and weights
most_important_features = np.take_along_axis(
    np.array(df.columns.tolist()), 
    sorted_feature_weight_idxes, axis=0)
most_important_weights = np.take_along_axis(
    np.array(rf.feature_importances_), 
    sorted_feature_weight_idxes, axis=0)


##feature importances for prediction
list(zip(most_important_features, most_important_weights))

[('Years with ES', 0.3596060876417491),
 ('Age onset', 0.21197770229740107),
 ('ES before', 0.13750612173869242),
 ('Ictal Seconds', 0.08368598620142682),
 ('Consciousness Time', 0.04445168755392684),
 ('Seizure Type', 0.04428449342427326),
 ('Ictal signs and symtoms--MA', 0.036814401126194496),
 ('Same day ES before', 0.026543817597985287),
 ('Ictal signs and symtoms--Vo', 0.010969131970429053),
 ('Laterality', 0.010028266107102828),
 ('Behavior before', 0.008891861405386075),
 ('Sex', 0.007201983453059538),
 ('Ictal signs and symtoms--Laughing', 0.0065720616668258755),
 ('Ictal signs and symtoms--OA', 0.004895333068464376),
 ('Ictal signs and symtoms--SMA', 0.0029995569551857304),
 ('Ictal signs and symtoms--Coughing', 0.002668113593882296),
 ('Ictal signs and symtoms--NRR', 0.00043683954251242367),
 ('Ictal signs and symtoms--Hiccup', 0.00030665566347807036),
 ('Ictal signs and symtoms--NRL', 0.00010629466614981067),
 ('Ictal signs and symtoms--Gaze', 5.360432587465697e-05),
 ('Ictal signs and symtoms--VA', 0.0)]


import plotly.express as px

##creating a tree map with location, sex , age .This shows the value count or number of patients in each group of location, age and sex
treedata=dataset.copy()
for col in ['Sex', 'Age', 'Age onset',]:
    treedata[col]=treedata[col].astype(int).astype(str)
treedata=treedata.groupby(['Location', 'Sex', 'Age'])['Nr'].count().reset_index().rename(columns={'Nr':'count'})
fig = px.treemap(treedata, path=['Location', 'Sex', 'Age'], values='count')
fig.update_layout(autosize=False,width=800,height=700)
fig.show()
# fig.write_html(r'D:\Machine learning\Epilepsia SUSANA\treemap.html')


treedata.columns

Index(['Location', 'Sex', 'Age', 'count'], dtype='object')


# Kmeans preparation
from sklearn.preprocessing import LabelEncoder
df=dataset.loc[:,'Location':'Ictal Seconds']
data2=df
encoder=LabelEncoder()
data2['Location']=encoder.fit_transform(data2['Location'])
data2


# finding the best K value to the appropiate number of clusters 
inertias = [] 
K = range(1,10) 
plt.figure(figsize=(12,8))
## finding k with other attributes
for k in K: 
    #Building and fitting the model 
    kmeanModel = KMeans(n_clusters=k).fit(data2) 
    kmeanModel.fit(data2)     
    inertias.append(kmeanModel.inertia_) 
    
plt.plot(K, inertias, 'bx-') 
plt.xlabel('Values of K') 
plt.ylabel('Inertia') 
plt.title('The Elbow Method using Inertia')

C:\Users\tommy\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:881: UserWarning:

KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

C:\Users\tommy\anaconda3\lib\site-packages\sklearn\cluster\_kmeans.py:881: UserWarning:

KMeans is known to have a memory leak on Windows with MKL, when there are less chunks than available threads. You can avoid it by setting the environment variable OMP_NUM_THREADS=1.

Text(0.5, 1.0, 'The Elbow Method using Inertia')


# the elbow point is coming at 4

# Fitting K-Means to the dataset
from sklearn.cluster import KMeans

## the number of clusters is set of 4
kmeans = KMeans(n_clusters = 4, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(data2)
y_kmeans1=y_kmeans+1
cluster = pd.DataFrame(y_kmeans1)
# Adding cluster to the Dataset
dataset['cluster'] = cluster
data2['cluster'] = cluster
dataset


##printing patients in each cluster
for i in range(1,5):
    print('****ID of patients in cluster {}*****'.format(str(i)))
    print(list(dataset[dataset.cluster==i]['Nr'].values))

****ID of patients in cluster 1*****
[1.0, 2.0, 3.0, 5.0, 6.0, 7.0, 12.0, 14.0, 19.0, 28.0, 29.0, 30.0, 38.0, 43.0, 49.0, 50.0, 51.0, 52.0, 54.0, 72.0, 73.0, 86.0, 89.0, 90.0, 93.0, 95.0, 96.0, 98.0, 99.0, 109.0, 110.0, 114.0, 115.0, 118.0, 120.0, 123.0, 125.0, 128.0, 130.0, 138.0, 141.0, 143.0, 145.0, 172.0, 174.0]
****ID of patients in cluster 2*****
[48.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 102.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 176.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 187.0, 188.0]
****ID of patients in cluster 3*****
[8.0, 9.0, 11.0, 13.0, 18.0, 20.0, 21.0, 24.0, 25.0, 26.0, 27.0, 31.0, 32.0, 33.0, 34.0, 35.0, 36.0, 37.0, 39.0, 40.0, 41.0, 42.0, 44.0, 46.0, 47.0, 53.0, 55.0, 67.0, 68.0, 69.0, 70.0, 71.0, 85.0, 88.0, 91.0, 92.0, 94.0, 100.0, 103.0, 105.0, 106.0, 107.0, 108.0, 111.0, 112.0, 113.0, 116.0, 117.0, 121.0, 122.0, 124.0, 126.0, 127.0, 129.0, 131.0, 132.0, 133.0, 134.0, 135.0, 136.0, 137.0, 139.0, 140.0, 142.0, 170.0, 171.0, 173.0, 175.0]
****ID of patients in cluster 4*****
[4.0, 10.0, 15.0, 16.0, 45.0, 83.0, 84.0, 87.0, 97.0, 101.0, 104.0, 144.0, 146.0, 163.0, 185.0, 186.0]


print(data2['cluster'].value_counts())

3    68
2    55
1    45
4    16
Name: cluster, dtype: int64


# Train a classifier
from sklearn.ensemble import RandomForestClassifier
def forestmodel(df):
    clf = RandomForestClassifier(random_state=1)
    clf.fit(df.drop(columns=["Binary Cluster 0","cluster"]).values, df["Binary Cluster 0"].values)

    # Index sort the most important features
    sorted_feature_weight_idxes = np.argsort(clf.feature_importances_)[::-1] # Reverse sort

    # Get the most important features names and weights
    most_important_features = np.take_along_axis(
        np.array(df.columns.tolist()), 
        sorted_feature_weight_idxes, axis=0)
    most_important_weights = np.take_along_axis(
        np.array(clf.feature_importances_), 
        sorted_feature_weight_idxes, axis=0)

    # Show
    return list(zip(most_important_features, most_important_weights))


for i in range(1,5):
    data2['Binary Cluster 0'] = np.where(data2['cluster']==i,1,0)
    print(f'## the feature importances for cluster {i}\n')
    feat_imp=forestmodel(data2)
    print(feat_imp)

## the feature importances for cluster 1

[('Ictal Seconds', 0.404402195843408), ('Seizure Type', 0.1610039480546899), ('Years with ES', 0.1276797177677198), ('Age onset', 0.08778215739327966), ('Age', 0.0777674326261514), ('ES before', 0.04128598658738516), ('Same day ES before', 0.02772220409778277), ('Sex', 0.02551892861854519), ('Location', 0.018516281311191766), ('Behavior before', 0.014473893632501296), ('Laterality', 0.013847254067345167)]
## the feature importances for cluster 2

[('Seizure Type', 0.6493457211279573), ('Ictal Seconds', 0.09709878225974283), ('Age', 0.05026043784401173), ('Age onset', 0.04539125339151699), ('Years with ES', 0.0427693759958361), ('ES before', 0.036646011785206344), ('Location', 0.032323646121786974), ('Same day ES before', 0.019077410308894236), ('Sex', 0.01129771735738162), ('Laterality', 0.00849939737145132), ('Behavior before', 0.007290246436214663)]
## the feature importances for cluster 3

[('Seizure Type', 0.352666356263936), ('Ictal Seconds', 0.34537009442344474), ('Years with ES', 0.06980027733210667), ('Age onset', 0.054839934256800026), ('ES before', 0.05242327124633987), ('Age', 0.0490743244766378), ('Location', 0.022922807340597446), ('Same day ES before', 0.021918581327734516), ('Sex', 0.012786904801336402), ('Laterality', 0.011272816830795599), ('Behavior before', 0.0069246317002709425)]
## the feature importances for cluster 4

[('Ictal Seconds', 0.631586373222727), ('Age', 0.07857557055109868), ('Years with ES', 0.06600087497427046), ('Age onset', 0.05385883838384985), ('Seizure Type', 0.05169409500700529), ('ES before', 0.04437213030496303), ('Location', 0.02152791471214902), ('Sex', 0.0197223252255728), ('Same day ES before', 0.016975537050294386), ('Laterality', 0.008952620771478317), ('Behavior before', 0.006733719796591151)]


# create dendrogram
plt.figure(figsize=(15,8))

dendrogram = sch.dendrogram(sch.linkage(data2, method='ward'))


from sklearn.cluster import AgglomerativeClustering

cluster = AgglomerativeClustering(n_clusters=4, affinity='euclidean', linkage='ward')
cluster.fit_predict(data2)

array([0, 0, 0, 1, 0, 0, 0, 3, 3, 1, 3, 0, 3, 0, 1, 1, 3, 0, 3, 3, 3, 0,
       3, 3, 0, 0, 0, 3, 3, 3, 0, 3, 0, 3, 0, 3, 3, 3, 0, 0, 3, 1, 3, 3,
       2, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
       3, 3, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 3, 0, 1, 3, 0, 0, 3,
       3, 0, 0, 0, 0, 1, 0, 2, 3, 1, 2, 0, 1, 3, 0, 3, 3, 0, 3, 3, 3, 3,
       0, 0, 0, 3, 0, 0, 0, 3, 0, 3, 3, 0, 0, 0, 3, 0, 3, 0, 3, 0, 3, 3,
       3, 0, 0, 3, 0, 3, 2, 1, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 0, 3, 0, 3, 0, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 1, 2, 2], dtype=int64)


dataset['cluster']=cluster.labels_+1


##printing patients in each cluster got by heirarchial clustering
for i in range(1,5):
    print('****ID of patients in cluster {}*****'.format(str(i)))
    print(list(dataset[dataset.cluster==i]['Nr'].values))

****ID of patients in cluster 1*****
[1.0, 2.0, 3.0, 5.0, 6.0, 7.0, 12.0, 14.0, 19.0, 25.0, 28.0, 29.0, 30.0, 34.0, 36.0, 38.0, 42.0, 43.0, 51.0, 52.0, 53.0, 55.0, 72.0, 73.0, 86.0, 89.0, 90.0, 93.0, 94.0, 95.0, 96.0, 98.0, 103.0, 106.0, 109.0, 114.0, 115.0, 116.0, 118.0, 120.0, 121.0, 123.0, 126.0, 127.0, 128.0, 130.0, 132.0, 134.0, 138.0, 139.0, 141.0, 172.0, 174.0, 176.0]
****ID of patients in cluster 2*****
[4.0, 10.0, 15.0, 16.0, 45.0, 84.0, 87.0, 97.0, 101.0, 104.0, 144.0, 146.0, 186.0]
****ID of patients in cluster 3*****
[48.0, 49.0, 50.0, 54.0, 56.0, 57.0, 58.0, 59.0, 60.0, 61.0, 62.0, 63.0, 64.0, 65.0, 66.0, 74.0, 75.0, 76.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 99.0, 102.0, 143.0, 145.0, 147.0, 148.0, 149.0, 150.0, 151.0, 152.0, 153.0, 154.0, 155.0, 156.0, 157.0, 158.0, 159.0, 160.0, 161.0, 162.0, 163.0, 164.0, 165.0, 166.0, 167.0, 168.0, 169.0, 177.0, 178.0, 179.0, 180.0, 181.0, 182.0, 183.0, 184.0, 185.0, 187.0, 188.0]
****ID of patients in cluster 4*****
[8.0, 9.0, 11.0, 13.0, 18.0, 20.0, 21.0, 24.0, 26.0, 27.0, 31.0, 32.0, 33.0, 35.0, 37.0, 39.0, 40.0, 41.0, 44.0, 46.0, 47.0, 67.0, 68.0, 69.0, 70.0, 71.0, 85.0, 88.0, 91.0, 92.0, 100.0, 105.0, 107.0, 108.0, 110.0, 111.0, 112.0, 113.0, 117.0, 122.0, 124.0, 125.0, 129.0, 131.0, 133.0, 135.0, 136.0, 137.0, 140.0, 142.0, 170.0, 171.0, 173.0, 175.0]


from sklearn.feature_selection import SelectKBest, chi2, f_regression
##from the dataset we take best 10 features that can correctly find consciousness time

df=dataset
encoder=LabelEncoder()
df['Location']=encoder.fit_transform(df['Location'])

X=df.drop(columns=['Nr','Consciousness Time'])
y=df['Consciousness Time']

# Create the object for SelectKBest and fit and transform the regression data
X_reg_new=SelectKBest(score_func=f_regression, k=10).fit_transform(X,y)


##the best 10 features to predict consciouous time 
X_reg_new=pd.DataFrame(X_reg_new)
for col in X_reg_new.columns:
    for col1 in dataset.columns:
        if all(dataset[col1]==X_reg_new[col]):
            print(col1)

Years with ES
Same day ES before
ES before
Postictal signs and symptoms--OA
Postictal signs and symptoms--NRR
Postictal signs and symptoms--Smacking
Postictal signs and symptoms--Hipcup
Level of Consciousness
Disnomia seconds
Aphasia TT


#first check high incidence for popular attributes like age, sex etc

df1=dataset[['cluster','Location', 'Sex', 'Age', 'Age onset', 'Years with ES',
       'Seizure Type', ]]
df1['group_incidence'] = df1.groupby(['cluster','Location', 'Sex', 'Age', 'Age onset', 'Years with ES'])['cluster'].transform('size') / len(df)

C:\Users\tommy\AppData\Local\Temp/ipykernel_3456/3965849163.py:5: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


#we can see the most common combination for the occurance, sorted
##this means that most commonly repeated age, sex and other groups of patients are shown
df1.sort_values('group_incidence',ascending=False).head(10)
# df1.sort_values('group_incidence',ascending=False).head(10)


df1.sort_values('group_incidence',ascending=False).tail(10)

Data model to understand recovery phase on epilepsy patients¶

Data Driven approach¶

Data Scientist: Dr. Juan Ignacio Barrios, MD MSc,¶

Doctoral Thesis by Susana Lara . Germany -Barcelona 2021¶

DESCRIPTIVE ANALYTICS¶

Distribution analysis CT with respect to sex and age¶

Distribution analysis CT with respect to laterality¶

Distribution analysis CT with respect to Behavior before¶

Distribution analysis CT with respect to sex¶

Correlation comparison for main features¶

Transforming - Adding age groups¶

Machine Learning section - Supervised training -¶

Predicting Conscious time using Random Forest Regressor¶

Tree Map -Hierarchical main Features array (location, sex, age )¶

Non supervised algorhitms -K means clustering method¶

Feature importances from k means data¶

hierarchial clustering¶

Using Kbest to select the most representative feature in the dataset¶

Calculating incidence of main features into groups¶

	Nr	Location	Sex	Age	Age onset	Years with ES	Seizure Type	Laterality	Behavior before	Same day ES before	...	Postictal signs and symptoms--Speaks incomprehensible	Postictal signs and symptoms--Cloni Arm	Postictal signs and symptoms--Stand up	Level of Consciousness	Coughing Time seconds--Coughing #1	Disnomia seconds	Aphasia TT
0	1.0	TR	2.0	20.0	9.0	11.0	4.0	1.0	1.0	2.0	...	0	0	0	2.0	0	0.0	0.0
1	2.0	TR	1.0	25.0	22.0	3.0	9.0	1.0	1.0	3.0	...	0	0	0	5.0	0	0.0	0.0
2	3.0	TR	2.0	28.0	6.0	22.0	10.0	1.0	2.0	0.0	...	0	0	0	2.0	39	0.0	0.0
3	4.0	TR	2.0	42.0	1.0	41.0	11.0	1.0	1.0	2.0	...	0	0	0	5.0	0	0.0	0.0
4	5.0	TR	2.0	35.0	10.0	25.0	4.0	1.0	1.0	0.0	...	0	0	0	2.0	0	0.0	0.0
5	6.0	TR	2.0	29.0	20.0	9.0	4.0	2.0	2.0	0.0	...	0	0	0	4.0	0	0.0	0.0
6	7.0	TR	1.0	57.0	3.0	54.0	4.0	1.0	1.0	0.0	...	0	0	0	2.0	0	0.0	0.0
7	8.0	TR	1.0	24.0	4.0	20.0	2.0	1.0	1.0	2.0	...	0	0	0	2.0	0	0.0	0.0
8	9.0	TR	1.0	34.0	7.0	27.0	2.0	1.0	1.0	4.0	...	0	0	0	2.0	0	0.0	0.0
9	10.0	TR	2.0	42.0	1.0	41.0	2.0	1.0	1.0	4.0	...	0	1	0	5.0	0	0.0	0.0
10	11.0	TR	1.0	22.0	13.0	9.0	2.0	1.0	2.0	0.0	...	0	0	0	2.0	0	0.0	0.0
11	12.0	TR	2.0	20.0	10.0	10.0	2.0	1.0	2.0	1.0	...	0	0	0	2.0	2	0.0	0.0
12	13.0	TR	1.0	19.0	14.0	5.0	2.0	1.0	2.0	1.0	...	0	0	0	2.0	0	0.0	0.0
13	14.0	TR	2.0	21.0	10.0	11.0	6.0	2.0	1.0	1.0	...	0	0	0	2.0	0	0.0	0.0
14	15.0	TR	1.0	43.0	16.0	27.0	7.0	2.0	1.0	0.0	...	0	0	0	4.0	0	0.0	0.0
15	16.0	TR	2.0	20.0	10.0	10.0	3.0	1.0	1.0	0.0	...	0	0	0	10.0	0	0.0	0.0
16	18.0	TR	1.0	61.0	30.0	31.0	1.0	1.0	2.0	2.0	...	0	0	0	2.0	0	46.0	0.0
17	19.0	TR	2.0	20.0	9.0	11.0	1.0	1.0	1.0	5.0	...	0	0	0	5.0	0	0.0	60.0
18	20.0	TR	1.0	24.0	23.0	1.0	1.0	1.0	2.0	6.0	...	0	0	0	2.0	0	0.0	0.0
19	21.0	TR	2.0	30.0	16.0	14.0	1.0	1.0	1.0	1.0	...	0	0	0	4.0	0	0.0	0.0
20	24.0	TR	2.0	18.0	16.0	2.0	1.0	1.0	1.0	1.0	...	0	0	0	2.0	0	0.0	0.0
21	25.0	TR	2.0	53.0	51.0	2.0	1.0	1.0	1.0	1.0	...	0	0	0	2.0	7	0.0	0.0
22	26.0	TR	1.0	24.0	22.0	2.0	1.0	1.0	1.0	1.0	...	0.083333	0.125	0.166667	2.0	0	0.0	0.0
23	27.0	TR	2.0	53.0	14.0	39.0	1.0	1.0	2.0	0.0	...	1	0	0	2.0	0	0.0	0.0
24	28.0	TR	1.0	52.0	1.0	51.0	1.0	3.0	1.0	1.0	...	0	0	0	4.0	0	0.0	0.0

	Location	Sex	Age	Age onset	Years with ES	Seizure Type	Laterality	Behavior before	Same day ES before	ES before	Ictal Seconds
0	3	2.0	20.0	9.0	11.0	4.0	1.0	1.0	2.0	6.0	91.0
1	3	1.0	25.0	22.0	3.0	9.0	1.0	1.0	3.0	3.0	98.0
2	3	2.0	28.0	6.0	22.0	10.0	1.0	2.0	0.0	0.0	90.0
3	3	2.0	42.0	1.0	41.0	11.0	1.0	1.0	2.0	2.0	197.0
4	3	2.0	35.0	10.0	25.0	4.0	1.0	1.0	0.0	0.0	122.0
...	...	...	...	...	...	...	...	...	...	...	...
179	0	1.0	13.0	5.0	8.0	57.0	1.0	2.0	7.0	8.0	28.0
180	0	2.0	26.0	2.0	24.0	65.0	2.0	2.0	2.0	3.0	122.0
181	0	1.0	33.0	29.0	4.0	74.0	1.0	1.0	3.0	3.0	214.0
182	0	2.0	56.0	7.0	49.0	83.0	1.0	1.0	7.0	23.0	66.0
183	0	1.0	21.0	4.0	17.0	82.0	1.0	1.0	4.0	4.0	54.0

	cluster	Location	Sex	Age	Age onset	Years with ES	Seizure Type	group_incidence
105	1	2	2.0	68.0	9.0	59.0	1.0	0.016304
163	3	2	1.0	47.0	35.0	12.0	82.0	0.016304
146	3	2	1.0	47.0	35.0	12.0	40.0	0.016304
157	3	2	1.0	47.0	35.0	12.0	74.0	0.016304
179	3	0	1.0	13.0	5.0	8.0	57.0	0.016304
85	1	2	2.0	68.0	9.0	59.0	2.0	0.016304
125	1	2	2.0	68.0	9.0	59.0	1.0	0.016304
177	3	0	1.0	13.0	5.0	8.0	49.0	0.016304
176	3	0	1.0	13.0	5.0	8.0	48.0	0.016304
128	4	2	1.0	51.0	1.0	50.0	23.0	0.010870

	Age_group	count
0	20-29	35
1	29-39	38
2	39-49	49
3	49-59	27
4	60+	19
5	<21	16