\l code/q/init.q

2023.02.16D15:08:08.507302000 WARN Using random seed 42 for reproducible results 
2023.02.16D15:08:08.507336000 WARN Setting the global modelsTrained to False. Web GUI wont be able to use models until this is set to True - 0b
2023.02.16D15:08:08.507353000 INFO Loading in script  - "ut.q"
2023.02.16D15:08:08.507446000 INFO Script for utility functions 
2023.02.16D15:08:08.507822000 INFO Loading in script  - "lib.q"
2023.02.16D15:08:08.507895000 INFO Script for importing python library/module packages 
2023.02.16D15:08:08.917000000 INFO Loading in script  - "mlf.q"
2023.02.16D15:08:08.917245000 INFO Script for machine learning functions 
2023.02.16D15:08:08.918224000 INFO Loading in script  - "graphics.q"
2023.02.16D15:08:08.918333000 INFO Script contains functions used for exploratory data analysis 
2023.02.16D15:08:08.918666000 INFO Opening port on 9090 to allow the Web GUI to connect


system"p 9090"


5#data:(camelCase each string cols data) xcol data:("**SJJSSJJJJFFFF";enlist ",")0:`:data/oasis_longitudinal_demographics.csv

subjectId   mriId           group       visit mrDelay mF hand age educ ses mmse cdr etiv     nwbv  asf  
--------------------------------------------------------------------------------------------------------
"OAS2_0001" "OAS2_0001_MR1" Nondemented 1     0       M  R    87  14   2   27   0   1986.55  0.696 0.883
"OAS2_0001" "OAS2_0001_MR2" Nondemented 2     457     M  R    88  14   2   30   0   2004.48  0.681 0.876
"OAS2_0002" "OAS2_0002_MR1" Demented    1     0       M  R    75  12       23   0.5 1678.29  0.736 1.046
"OAS2_0002" "OAS2_0002_MR2" Demented    2     560     M  R    76  12       28   0.5 1737.62  0.713 1.01 
"OAS2_0002" "OAS2_0002_MR3" Demented    3     1895    M  R    80  12       22   0.5 1697.911 0.701 1.034


`group in .Q.res,key `.q

1b


renameCol[`group`state;`data]

subjectId   mriId           state       visit mrDelay mF hand age educ ses mmse cdr etiv    nwbv  asf  
-------------------------------------------------------------------------------------------------------
"OAS2_0001" "OAS2_0001_MR1" Nondemented 1     0       M  R    87  14   2   27   0   1986.55 0.696 0.883


target:select state from data

dropCol[`data;`state]

`data


info[data]

RangeIndex: 373 entries , 0 to 372
Columns total: 14
column   | nullExists uniqVals datatype
---------| ----------------------------
subjectId| 0          150      string  
mriId    | 0          373      string  
visit    | 0          5        long    
mrDelay  | 0          201      long    
mF       | 0          2        symbol  
hand     | 0          1        symbol  
age      | 0          39       long    
educ     | 0          12       long    
ses      | 1          6        long    
mmse     | 1          19       long    
cdr      | 0          4        float   
etiv     | 0          371      float   
nwbv     | 0          136      float   
asf      | 0          265      float   
Memory usage: 0.04214764 MB


describe:{[t]
 //Extract non-numerical columns
 c:exec c from meta[t] where not t in "ifj";
 //Remove them from table
 t:![t;();0b;c];
 //Applying functions to each numerical column of table
 d:{`Field`Count`Mean`Min`Max`Median`q25`q75`STD`IQR!raze(x;(count; avg; min; max;med;pctl[.25;];pctl[.75;];dev;iqr)@\:y[x])}[;t] each cols t;
 :d }


describe[data]

Field   Count Mean      Min      Max     Median   q25     q75      STD       IQR     
-------------------------------------------------------------------------------------
visit   373   1.882038  1        5       2        1       2        0.9216049 1       
mrDelay 373   595.1046  0        2639    552      0       873      634.6327  873     
age     373   77.0134   60       98      77       71      82       7.630708  11      
educ    373   14.59786  6        23      15       12      16       2.872481  4       
ses     373   2.460452  1        5       2        1       3        1.132402  2       
mmse    373   27.34232  4        30      29       27      30       3.678277  3       
cdr     373   0.2908847 0f       2f      0        0f      0.5      0.3740547 0.5     
etiv    373   1488.122  1105.652 2004.48 1470.041 1357.33 1596.937 175.8997  239.6068
nwbv    373   0.7295684 0.644    0.837   0.729    0.7     0.756    0.0370852 0.056   
asf     373   1.195461  0.876    1.587   1.194    1.099   1.293    0.1379067 0.194


dropCol[`data;`hand]

`data


trainTestSplit[data;target;0.2;seed]


shape each `X_train`X_test`y_train`y_test;

2023.02.16D15:08:08.920601000 INFO Shape of X_train table: 298 rows 13 columns 
2023.02.16D15:08:08.920614000 INFO Shape of X_test table: 75 rows 13 columns 
2023.02.16D15:08:08.920623000 INFO Shape of y_train table: 298 rows 1 columns 
2023.02.16D15:08:08.920631000 INFO Shape of y_test table: 75 rows 1 columns


asc count'[group y_train]

state      |   
-----------| --
Converted  | 27
Demented   | 116
Nondemented| 155


train_copy:X_train,'y_train


graphConfig[]


factorPlot[train_copy;`mF;`state];


\l code/q/graphics.q

2023.02.16D15:08:09.108346000 INFO Script contains functions used for exploratory data analysis


countPlot[train_copy;;] ./:  splitList[`age`ses`mmse`cdr`educ`visit;3]

::
::
::


facetGrid[train_copy;`state;"Dementia Variation in ";] each `etiv`educ`ses`nwbv`mmse`asf;


pairPlot[select asf,nwbv,etiv,mF from data; `mF]

<Figure size 2160x1440 with 0 Axes>


numCols:exec c from meta[X_train] where not t in "Cs";
strCols:exec c from meta[X_train] where t in "C";
symCols:exec c from meta[X_train] where t in "s";


@[`X_train;`mrDelay`etiv;{log(1+x)}]

`X_train


pivotTab[train_copy;`state]

state      | Converted Demented Nondemented
-----------| ------------------------------
Converted  | 1         0        0          
Demented   | 0         1        0          
Nondemented| 0         0        1


dummies:{[t;v;r]
  pvt: t lj pivotTab[t;v];
  t:(enlist v) _ pvt;
  show 10#t;
  @[`.;r;:;t]; }


dummies[train_copy;`state;`dummy]

subjectId   mriId           visit mrDelay mF age educ ses mmse cdr etiv     nwbv  asf   Converted Demented Nondemented
----------------------------------------------------------------------------------------------------------------------
"OAS2_0147" "OAS2_0147_MR3" 3     1204    F  80  13   2   28   0   1336.6   0.762 1.313 0         0        1          
"OAS2_0142" "OAS2_0142_MR2" 2     665     F  71  16   3   28   0   1390.443 0.81  1.262 0         0        1          
"OAS2_0108" "OAS2_0108_MR2" 2     883     M  79  18   1   27   0.5 1569.498 0.781 1.118 0         1        0          
"OAS2_0058" "OAS2_0058_MR1" 1     0       M  78  14   3   30   0.5 1314.52  0.707 1.335 0         1        0          
"OAS2_0069" "OAS2_0069_MR2" 2     432     F  82  18   2   30   0   1470.873 0.69  1.193 0         0        1          
"OAS2_0140" "OAS2_0140_MR3" 3     1655    F  81  16   3   25   0.5 1395.627 0.687 1.257 0         1        0          
"OAS2_0103" "OAS2_0103_MR1" 1     0       F  69  16   1   30   0   1404.463 0.75  1.25  1         0        0          
"OAS2_0086" "OAS2_0086_MR1" 1     0       F  63  15   2   28   0   1544.31  0.805 1.136 0         0        1          
"OAS2_0017" "OAS2_0017_MR5" 5     2400    M  86  12   3   27   0   1813.215 0.761 0.968 0         0        1          
"OAS2_0023" "OAS2_0023_MR2" 2     578     F  87  12   4   21   0.5 1249.56  0.652 1.405 0         1        0


corrMatrix[dummy];


corrPairs[dummy;`Demented`Nondemented`Converted;5]

cdr  Demented   | 0.8327593
mmse Nondemented| 0.5301072
nwbv Nondemented| 0.316708
educ Nondemented| 0.1814134
ses  Demented   | 0.1520445


hotEncode[`X_train;`mF]

subjectId   mriId           visit mrDelay  age educ ses mmse cdr etiv     nwbv  asf   F M
-----------------------------------------------------------------------------------------
"OAS2_0147" "OAS2_0147_MR3" 3     7.094235 80  13   2   28   0   7.198632 0.762 1.313 1 0
"OAS2_0142" "OAS2_0142_MR2" 2     6.50129  71  16   3   28   0   7.238096 0.81  1.262 1 0
"OAS2_0108" "OAS2_0108_MR2" 2     6.784457 79  18   1   27   0.5 7.359148 0.781 1.118 0 1
"OAS2_0058" "OAS2_0058_MR1" 1     0        78  14   3   30   0.5 7.181987 0.707 1.335 0 1
"OAS2_0069" "OAS2_0069_MR2" 2     6.070738 82  18   2   30   0   7.294291 0.69  1.193 1 0
"OAS2_0140" "OAS2_0140_MR3" 3     7.41216  81  16   3   25   0.5 7.241816 0.687 1.257 1 0
"OAS2_0103" "OAS2_0103_MR1" 1     0        69  16   1   30   0   7.248122 0.75  1.25  1 0
"OAS2_0086" "OAS2_0086_MR1" 1     0        63  15   2   28   0   7.34298  0.805 1.136 1 0
"OAS2_0017" "OAS2_0017_MR5" 5     7.783641 86  12   3   27   0   7.503408 0.761 0.968 0 1
"OAS2_0023" "OAS2_0023_MR2" 2     6.361302 87  12   4   21   0.5 7.131347 0.652 1.405 1 0
"OAS2_0067" "OAS2_0067_MR1" 1     0        67  12   4   30   0   7.273238 0.727 1.219 0 1
"OAS2_0150" "OAS2_0150_MR2" 2     6.251904 75  12   3   27   1   7.21368  0.714 1.293 1 0
"OAS2_0108" "OAS2_0108_MR1" 1     0        77  18   1   25   0.5 7.381172 0.781 1.094 0 1
"OAS2_0080" "OAS2_0080_MR1" 1     0        66  15   2   25   0.5 7.345384 0.727 1.134 0 1
"OAS2_0139" "OAS2_0139_MR1" 1     0        67  16   1   29   0.5 7.199199 0.766 1.312 1 0
"OAS2_0049" "OAS2_0049_MR3" 3     6.533789 71  16   3   30   0   7.315817 0.788 1.168 1 0
"OAS2_0036" "OAS2_0036_MR5" 5     7.770645 75  13   4   29   0   7.208054 0.778 1.301 1 0
"OAS2_0117" "OAS2_0117_MR1" 1     0        73  20   2   30   0   7.519329 0.758 0.953 0 1
"OAS2_0174" "OAS2_0174_MR1" 1     0        60  12   4   30   0   7.229672 0.806 1.273 0 1
"OAS2_0147" "OAS2_0147_MR1" 1     0        77  13   2   29   0   7.209052 0.769 1.299 1 0
"OAS2_0070" "OAS2_0070_MR2" 2     6.511745 82  17   1   29   0   7.434045 0.723 1.037 0 1
"OAS2_0009" "OAS2_0009_MR2" 2     6.357842 69  12   2   24   0.5 7.300365 0.791 1.186 0 1
"OAS2_0073" "OAS2_0073_MR1" 1     0        70  14   3   29   0   7.330071 0.787 1.151 1 0
"OAS2_0070" "OAS2_0070_MR5" 5     7.777793 86  17   1   30   0   7.450949 0.705 1.02  0 1
"OAS2_0097" "OAS2_0097_MR1" 1     0        74  16   2   30   0   7.385187 0.729 1.089 0 1
"OAS2_0013" "OAS2_0013_MR1" 1     0        81  12   4   30   0   7.115355 0.715 1.427 1 0
"OAS2_0036" "OAS2_0036_MR4" 4     7.4793   73  13   4   30   0   7.215703 0.773 1.291 1 0
"OAS2_0087" "OAS2_0087_MR1" 1     0        96  17   1   26   1   7.290231 0.683 1.198 1 0
"OAS2_0095" "OAS2_0095_MR2" 2     6.51323  72  18   1   29   0   7.48793  0.687 0.983 0 1
"OAS2_0037" "OAS2_0037_MR1" 1     0        82  12   4   27   0.5 7.29833  0.729 1.188 0 1
"OAS2_0041" "OAS2_0041_MR2" 2     6.629363 73  16   1   28   0   7.166683 0.768 1.356 1 0
"OAS2_0133" "OAS2_0133_MR3" 3     6.914731 81  12   3   28   0.5 7.310296 0.687 1.174 1 0
"OAS2_0114" "OAS2_0114_MR1" 1     0        76  12       27   0.5 7.183393 0.727 1.333 1 0
"OAS2_0094" "OAS2_0094_MR2" 2     6.706862 63  16   1   30   0   7.279    0.774 1.212 1 0
"OAS2_0171" "OAS2_0171_MR3" 3     7.436028 81  16   3   30   0   7.515972 0.744 0.956 0 1
"OAS2_0171" "OAS2_0171_MR2" 2     6.202536 77  16   3   30   0   7.507037 0.768 0.964 0 1
"OAS2_0143" "OAS2_0143_MR3" 3     7.348588 93  18   2   29   0   7.464442 0.723 1.006 1 0
"OAS2_0004" "OAS2_0004_MR1" 1     0        88  18   3   28   0   7.103593 0.71  1.444 1 0
"OAS2_0116" "OAS2_0116_MR2" 2     6.424869 75  12   3   28   0.5 7.250153 0.77  1.247 1 0
"OAS2_0178" "OAS2_0178_MR2" 2     6.398595 90  14   2   28   0   7.310764 0.746 1.174 1 0
"OAS2_0077" "OAS2_0077_MR2" 2     7.239933 73  16   2   29   0   7.566094 0.722 0.909 0 1
"OAS2_0131" "OAS2_0131_MR2" 2     6.522093 67  12   2   25   0   7.19454  0.761 1.318 1 0
"OAS2_0071" "OAS2_0071_MR2" 2     5.902633 84  13   2   28   1   7.246439 0.695 1.252 1 0
"OAS2_0129" "OAS2_0129_MR3" 3     7.372746 82  18   1   29   0   7.274533 0.644 1.217 1 0
"OAS2_0106" "OAS2_0106_MR1" 1     0        70  11   4   22   1   7.276667 0.722 1.214 1 0
"OAS2_0104" "OAS2_0104_MR2" 2     6.144186 71  16   1   17   1   7.354509 0.685 1.123 0 1
"OAS2_0114" "OAS2_0114_MR2" 2     6.347389 78  12       27   1   7.177897 0.709 1.341 1 0
"OAS2_0063" "OAS2_0063_MR1" 1     0        80  12       30   0.5 7.2658   0.737 1.228 1 0
"OAS2_0122" "OAS2_0122_MR2" 2     6.393591 88  16   3   30   0   7.166891 0.744 1.355 1 0
"OAS2_0012" "OAS2_0012_MR1" 1     0        78  16   2   29   0   7.196215 0.748 1.316 1 0
"OAS2_0054" "OAS2_0054_MR2" 2     6.741701 87  18   1   24   0.5 7.151627 0.683 1.376 1 0
"OAS2_0134" "OAS2_0134_MR2" 2     6.291569 71  11   4   28   0.5 7.158323 0.741 1.367 1 0
"OAS2_0127" "OAS2_0127_MR5" 5     7.878534 86  18   1   30   0.5 7.421315 0.669 1.051 0 1
"OAS2_0102" "OAS2_0102_MR2" 2     6.415097 84  15   3   29   0.5 7.312113 0.686 1.172 0 1
"OAS2_0139" "OAS2_0139_MR2" 2     6.001415 68  16   1   29   0.5 7.204393 0.733 1.305 1 0
"OAS2_0096" "OAS2_0096_MR1" 1     0        89  13   3   29   0   7.051639 0.75  1.521 1 0
"OAS2_0047" "OAS2_0047_MR2" 2     6.188264 78  16   2   27   0   7.254576 0.727 1.242 1 0
"OAS2_0162" "OAS2_0162_MR1" 1     0        82  14   2   23   0.5 7.32329  0.678 1.159 0 1
"OAS2_0077" "OAS2_0077_MR1" 1     0        69  16   2   28   0   7.522331 0.737 0.95  0 1
"OAS2_0156" "OAS2_0156_MR1" 1     0        78  18   1   30   0   7.125734 0.748 1.412 1 0
"OAS2_0119" "OAS2_0119_MR3" 3     7.446585 85  15   2   30   0   7.305622 0.741 1.18  1 0
"OAS2_0048" "OAS2_0048_MR1" 1     0        66  16   1   19   1   7.435804 0.711 1.036 0 1
"OAS2_0061" "OAS2_0061_MR2" 2     6.77308  70  18   1   30   0   7.415229 0.738 1.057 0 1
"OAS2_0051" "OAS2_0051_MR3" 3     7.33106  97  23   1   30   0   7.30219  0.689 1.184 1 0
"OAS2_0026" "OAS2_0026_MR1" 1     0        82  12   3   27   0.5 7.258989 0.713 1.236 0 1
"OAS2_0177" "OAS2_0177_MR2" 2     6.50129  70  14   3   28   0   7.320765 0.77  1.162 0 1
"OAS2_0027" "OAS2_0027_MR2" 2     6.413459 71  12   3   30   0   7.215747 0.782 1.291 1 0
"OAS2_0134" "OAS2_0134_MR1" 1     0        70  11   4   29   0.5 7.167001 0.748 1.355 1 0
"OAS2_0094" "OAS2_0094_MR1" 1     0        61  16   1   30   0   7.322345 0.771 1.16  1 0
"OAS2_0023" "OAS2_0023_MR1" 1     0        86  12   4   21   0.5 7.129482 0.662 1.407 1 0
"OAS2_0075" "OAS2_0075_MR2" 2     6.342121 75  8    5   22   0.5 7.042382 0.741 1.535 1 0
"OAS2_0034" "OAS2_0034_MR2" 2     6.194405 80  16   1   30   0   7.280208 0.698 1.21  1 0
"OAS2_0016" "OAS2_0016_MR2" 2     6.562444 90  8    4   23   0.5 7.419908 0.646 1.052 0 1
"OAS2_0021" "OAS2_0021_MR2" 2     7.060476 76  20   1   25   0.5 7.563957 0.736 0.911 0 1
"OAS2_0128" "OAS2_0128_MR1" 1     0        76  16   1   28   0   7.205316 0.762 1.304 1 0
"OAS2_0135" "OAS2_0135_MR2" 2     7.044905 78  18   2   27   0   7.406292 0.663 1.067 0 1
"OAS2_0088" "OAS2_0088_MR2" 2     6.622736 80  12   4   20   1   7.309808 0.661 1.175 0 1
"OAS2_0145" "OAS2_0145_MR1" 1     0        68  16   3   30   0   7.169435 0.799 1.352 1 0
"OAS2_0185" "OAS2_0185_MR3" 3     7.739794 86  16   1   26   0.5 7.431898 0.675 1.04  0 1
"OAS2_0008" "OAS2_0008_MR1" 1     0        93  14   2   30   0   7.148747 0.698 1.38  1 0
"OAS2_0092" "OAS2_0092_MR2" 2     6.561031 84  12   2   27   0.5 7.237649 0.728 1.263 1 0
"OAS2_0142" "OAS2_0142_MR1" 1     0        69  16   3   29   0   7.230317 0.819 1.272 1 0
"OAS2_0060" "OAS2_0060_MR1" 1     0        75  13   4   29   0.5 7.256389 0.766 1.239 0 1
"OAS2_0091" "OAS2_0091_MR1" 1     0        75  12   4   28   0   7.321142 0.739 1.162 0 1
"OAS2_0062" "OAS2_0062_MR3" 3     7.20934  83  18   2   29   0   7.419622 0.688 1.053 1 0
"OAS2_0061" "OAS2_0061_MR3" 3     7.409742 72  18   1   30   0   7.427906 0.729 1.044 0 1
"OAS2_0118" "OAS2_0118_MR1" 1     0        67  14   4   30   0   7.319282 0.794 1.164 1 0
"OAS2_0039" "OAS2_0039_MR2" 2     6.188264 83  18   2   25   0.5 7.073606 0.733 1.488 1 0
"OAS2_0095" "OAS2_0095_MR1" 1     0        71  18   1   30   0   7.479006 0.699 0.992 0 1
"OAS2_0104" "OAS2_0104_MR1" 1     0        70  16   1   25   0.5 7.358436 0.696 1.119 0 1
"OAS2_0055" "OAS2_0055_MR2" 2     6.588926 67  13   3   27   0   7.219832 0.827 1.285 0 1
"OAS2_0086" "OAS2_0086_MR2" 2     6.688355 65  15   2   28   0   7.341597 0.792 1.138 1 0
"OAS2_0013" "OAS2_0013_MR2" 2     6.467699 82  12   4   30   0   7.101058 0.72  1.448 1 0
"OAS2_0100" "OAS2_0100_MR1" 1     0        77  11   4   29   0   7.367999 0.777 1.108 1 0
"OAS2_0113" "OAS2_0113_MR1" 1     0        73  13   2   23   0.5 7.337702 0.725 1.142 1 0
"OAS2_0160" "OAS2_0160_MR1" 1     0        76  12       27   0.5 7.351254 0.705 1.127 0 1
"OAS2_0037" "OAS2_0037_MR3" 3     7.615791 88  12   4   26   0.5 7.30222  0.709 1.184 0 1
"OAS2_0144" "OAS2_0144_MR2" 2     6.527958 79  16   1   30   0.5 7.451712 0.708 1.019 0 1
"OAS2_0159" "OAS2_0159_MR1" 1     0        73  14   3   29   0.5 7.121947 0.757 1.418 1 0
"OAS2_0076" "OAS2_0076_MR2" 2     6.863803 69  18   2   29   0   7.337594 0.719 1.143 1 0
"OAS2_0001" "OAS2_0001_MR2" 2     6.126869 88  14   2   30   0   7.603638 0.681 0.876 0 1
"OAS2_0147" "OAS2_0147_MR2" 2     6.089045 78  13   2   29   0   7.196469 0.769 1.316 1 0
"OAS2_0174" "OAS2_0174_MR3" 3     7.349874 64  12   4   30   0   7.223302 0.794 1.281 0 1
"OAS2_0057" "OAS2_0057_MR2" 2     6.463029 83  12   2   29   0   7.358965 0.757 1.118 1 0
"OAS2_0091" "OAS2_0091_MR2" 2     6.954639 78  12   4   29   0   7.317644 0.715 1.166 0 1
"OAS2_0008" "OAS2_0008_MR2" 2     6.610696 95  14   2   29   0   7.137557 0.703 1.396 1 0
"OAS2_0055" "OAS2_0055_MR1" 1     0        65  13   3   29   0   7.217157 0.837 1.289 0 1
"OAS2_0127" "OAS2_0127_MR4" 4     7.675082 84  18   1   29   0.5 7.420042 0.694 1.052 0 1
"OAS2_0048" "OAS2_0048_MR5" 5     7.118016 69  16   1   4    1   7.439542 0.676 1.032 0 1
"OAS2_0036" "OAS2_0036_MR1" 1     0        69  13   4   30   0   7.215483 0.789 1.291 1 0
"OAS2_0092" "OAS2_0092_MR1" 1     0        83  12   2   28   0   7.233    0.748 1.269 1 0
"OAS2_0073" "OAS2_0073_MR4" 4     7.73587  76  14   3   29   0   7.307204 0.774 1.178 1 0
"OAS2_0101" "OAS2_0101_MR1" 1     0        71  18   2   30   0   7.22377  0.769 1.28  1 0
"OAS2_0147" "OAS2_0147_MR4" 4     7.499423 82  13   2   30   0   7.202929 0.747 1.307 1 0
"OAS2_0046" "OAS2_0046_MR2" 2     6.356108 85  15   2   22   1   7.302665 0.748 1.183 1 0
"OAS2_0061" "OAS2_0061_MR1" 1     0        68  18   1   30   0   7.411756 0.747 1.061 0 1
"OAS2_0027" "OAS2_0027_MR3" 3     7.118826 73  12   3   30   0   7.214225 0.775 1.293 1 0
"OAS2_0017" "OAS2_0017_MR4" 4     7.529406 85  12   3   30   0   7.507055 0.755 0.964 0 1
"OAS2_0176" "OAS2_0176_MR2" 2     6.652863 87  16   2   30   0   7.243484 0.696 1.255 0 1
"OAS2_0010" "OAS2_0010_MR1" 1     0        66  12   3   30   0.5 7.277704 0.769 1.213 1 0
"OAS2_0062" "OAS2_0062_MR2" 2     6.584791 81  18   2   30   0   7.417731 0.677 1.055 1 0
"OAS2_0183" "OAS2_0183_MR3" 3     6.597146 68  13   2   30   0   7.31783  0.74  1.165 1 0
"OAS2_0017" "OAS2_0017_MR1" 1     0        80  12   3   29   0   7.486395 0.752 0.985 0 1
"OAS2_0126" "OAS2_0126_MR1" 1     0        74  12   3   29   0   7.203963 0.739 1.306 1 0
"OAS2_0141" "OAS2_0141_MR1" 1     0        65  18   2   30   0   7.152981 0.812 1.374 1 0
"OAS2_0160" "OAS2_0160_MR2" 2     6.315358 78  12       29   1   7.358831 0.704 1.119 0 1
"OAS2_0101" "OAS2_0101_MR2" 2     6.859615 74  18   2   30   0   7.244813 0.752 1.254 1 0
"OAS2_0165" "OAS2_0165_MR2" 2     6.602588 80  12   3   17   1   7.470794 0.696 1     0 1
"OAS2_0117" "OAS2_0117_MR2" 2     6.357842 74  20   2   30   0   7.499252 0.759 0.972 0 1
"OAS2_0048" "OAS2_0048_MR4" 4     6.878326 68  16   1   7    1   7.446894 0.682 1.024 0 1
"OAS2_0186" "OAS2_0186_MR1" 1     0        61  13   2   30   0   7.185402 0.801 1.331 1 0
"OAS2_0064" "OAS2_0064_MR3" 3     7.156956 82  8    5   18   0.5 7.289713 0.682 1.199 1 0
"OAS2_0088" "OAS2_0088_MR1" 1     0        78  12   4   21   1   7.298763 0.672 1.188 0 1
"OAS2_0120" "OAS2_0120_MR1" 1     0        76  14   3   25   1   7.251047 0.715 1.246 1 0
"OAS2_0090" "OAS2_0090_MR2" 2     6.523562 75  18   2   29   0   7.336097 0.772 1.144 0 1
"OAS2_0175" "OAS2_0175_MR3" 3     7.203406 73  16   4   28   0.5 7.497709 0.731 0.973 0 1
"OAS2_0009" "OAS2_0009_MR1" 1     0        68  12   2   27   0.5 7.284547 0.806 1.205 0 1
"OAS2_0075" "OAS2_0075_MR1" 1     0        73  8    5   25   0.5 7.049342 0.743 1.525 1 0
"OAS2_0046" "OAS2_0046_MR1" 1     0        83  15   2   20   0.5 7.297829 0.75  1.189 1 0
"OAS2_0181" "OAS2_0181_MR1" 1     0        74  12       26   0.5 7.066151 0.733 1.499 1 0
"OAS2_0122" "OAS2_0122_MR1" 1     0        86  16   3   30   0   7.165578 0.747 1.357 1 0
"OAS2_0138" "OAS2_0138_MR1" 1     0        73  16   2   29   0   7.024444 0.786 1.563 1 0
"OAS2_0121" "OAS2_0121_MR2" 2     6.473891 74  11   4   30   0   7.325347 0.705 1.157 1 0
"OAS2_0042" "OAS2_0042_MR2" 2     6.916715 73  17   3   29   0   7.418139 0.748 1.054 1 0
"OAS2_0176" "OAS2_0176_MR1" 1     0        84  16   2   30   0   7.247842 0.71  1.25  0 1
"OAS2_0149" "OAS2_0149_MR2" 2     6.514713 83  13   2   30   0   7.19772  0.732 1.314 1 0
"OAS2_0002" "OAS2_0002_MR2" 2     6.329721 76  12       28   0.5 7.460847 0.713 1.01  0 1
"OAS2_0078" "OAS2_0078_MR1" 1     0        89  16   1   28   0   7.397255 0.674 1.076 0 1
"OAS2_0018" "OAS2_0018_MR3" 3     6.194405 88  14   1   29   0   7.243534 0.713 1.255 1 0
"OAS2_0026" "OAS2_0026_MR2" 2     6.51323  84  12   3   27   0.5 7.276817 0.695 1.214 0 1
"OAS2_0048" "OAS2_0048_MR3" 3     6.473891 68  16   1   19   1   7.445867 0.691 1.025 0 1
"OAS2_0035" "OAS2_0035_MR1" 1     0        88  12   4   30   0   7.198341 0.738 1.313 1 0
"OAS2_0053" "OAS2_0053_MR1" 1     0        82  16   3   29   0   7.30319  0.76  1.183 1 0
"OAS2_0052" "OAS2_0052_MR2" 2     7.320527 78  18   2   30   0   7.302871 0.703 1.183 0 1
"OAS2_0022" "OAS2_0022_MR1" 1     0        61  16   3   30   0   7.180664 0.805 1.337 1 0
"OAS2_0079" "OAS2_0079_MR2" 2     6.371612 71  12   4   16   1   7.308556 0.725 1.176 1 0
"OAS2_0036" "OAS2_0036_MR3" 3     6.570883 70  13   4   30   0   7.216555 0.783 1.29  1 0
"OAS2_0120" "OAS2_0120_MR2" 2     6.390241 78  14   3   15   2   7.245748 0.7   1.253 1 0
"OAS2_0172" "OAS2_0172_MR1" 1     0        75  16   1   30   0.5 7.545162 0.709 0.928 0 1
"OAS2_0068" "OAS2_0068_MR1" 1     0        88  12   3   30   0   7.264401 0.7   1.229 1 0
"OAS2_0089" "OAS2_0089_MR1" 1     0        70  12   2   29   0.5 7.267853 0.692 1.225 0 1
"OAS2_0050" "OAS2_0050_MR1" 1     0        71  12   4   20   0.5 7.287253 0.724 1.202 0 1
"OAS2_0099" "OAS2_0099_MR2" 2     6.694562 83  12       23   0.5 7.303022 0.75  1.183 1 0
"OAS2_0030" "OAS2_0030_MR1" 1     0        60  18   1   30   0   7.246439 0.822 1.252 1 0
"OAS2_0172" "OAS2_0172_MR2" 2     7.100852 79  16   1   29   0.5 7.549401 0.7   0.924 0 1
"OAS2_0066" "OAS2_0066_MR2" 2     6.2106   62  18   1   30   0.5 7.564604 0.731 0.91  0 1
"OAS2_0031" "OAS2_0031_MR3" 3     7.37086  91  12   3   28   0.5 7.289071 0.696 1.199 1 0
"OAS2_0071" "OAS2_0071_MR1" 1     0        83  13   2   27   1   7.23836  0.705 1.262 1 0
"OAS2_0175" "OAS2_0175_MR2" 2     6.552508 72  16   4   28   0.5 7.493646 0.732 0.977 0 1
"OAS2_0049" "OAS2_0049_MR1" 1     0        69  16   3   30   0   7.308    0.794 1.177 1 0
"OAS2_0018" "OAS2_0018_MR1" 1     0        87  14   1   30   0   7.249506 0.715 1.248 1 0
"OAS2_0127" "OAS2_0127_MR1" 1     0        79  18   1   29   0   7.405745 0.729 1.067 0 1
"OAS2_0131" "OAS2_0131_MR1" 1     0        65  12   2   30   0.5 7.20137  0.754 1.309 1 0
"OAS2_0007" "OAS2_0007_MR3" 3     6.251904 73  16       27   1   7.219276 0.727 1.286 0 1
"OAS2_0034" "OAS2_0034_MR4" 4     7.541683 84  16   1   30   0   7.281844 0.684 1.208 1 0
"OAS2_0140" "OAS2_0140_MR2" 2     6.677083 78  16   3   27   0.5 7.24004  0.69  1.26  1 0
"OAS2_0020" "OAS2_0020_MR3" 3     7.355002 84  20   1   26   0.5 7.376469 0.666 1.099 0 1
"OAS2_0058" "OAS2_0058_MR3" 3     6.639876 80  14   3   29   0.5 7.188964 0.695 1.326 0 1
"OAS2_0141" "OAS2_0141_MR2" 2     6.930495 68  18   2   29   0   7.162924 0.795 1.361 1 0
"OAS2_0034" "OAS2_0034_MR3" 3     7.160846 82  16   1   30   0   7.286744 0.695 1.202 1 0
"OAS2_0124" "OAS2_0124_MR1" 1     0        70  16   3   29   0.5 7.288866 0.749 1.2   0 1
"OAS2_0051" "OAS2_0051_MR1" 1     0        92  23   1   29   0   7.282954 0.701 1.207 1 0
"OAS2_0181" "OAS2_0181_MR3" 3     7.010312 77  12            1   7.055802 0.733 1.515 1 0
"OAS2_0028" "OAS2_0028_MR1" 1     0        64  18   2   22   0.5 7.344713 0.737 1.134 0 1
"OAS2_0096" "OAS2_0096_MR2" 2     6.658011 91  13   3   28   0   7.061557 0.736 1.506 1 0
"OAS2_0165" "OAS2_0165_MR1" 1     0        78  12   3   23   1   7.30802  0.71  1.177 0 1
"OAS2_0070" "OAS2_0070_MR3" 3     7.255591 84  17   1   29   0   7.443172 0.717 1.028 0 1
"OAS2_0161" "OAS2_0161_MR1" 1     0        77  16   1   29   0   7.506257 0.734 0.965 0 1
"OAS2_0159" "OAS2_0159_MR2" 2     6.633318 76  14   3   28   0.5 7.120844 0.764 1.419 1 0
"OAS2_0067" "OAS2_0067_MR2" 2     6.113682 68  12   4   29   0   7.272016 0.738 1.22  0 1
"OAS2_0098" "OAS2_0098_MR1" 1     0        66  12   4   30   0.5 7.277206 0.78  1.214 0 1
"OAS2_0146" "OAS2_0146_MR1" 1     0        80  15   2   20   1   7.457552 0.685 1.013 1 0
"OAS2_0100" "OAS2_0100_MR2" 2     7.105786 80  11   4   30   0   7.369635 0.757 1.107 1 0
"OAS2_0182" "OAS2_0182_MR1" 1     0        73  12       23   0.5 7.415939 0.698 1.056 0 1
"OAS2_0044" "OAS2_0044_MR1" 1     0        68  14   4   21   1   7.195847 0.685 1.317 0 1
..


impute[`X_train;`med]

2023.02.16D15:08:13.315219000 WARN Found nulls in the following columns: ses,mmse 
2023.02.16D15:08:13.315231000 INFO Replacing nulls with the columns med value

`X_train


update ses:{d:v!reverse v:asc[distinct X_train`ses];d[x]}'[ses] from `X_train

`X_train


outliers:([feature:()]index:();outlier:())


outlierDetect[X_train;;`outliers] each numCols;


outliers

feature| index                   outlier         
-------| ----------------------------------------
visit  | 8 16 23 52 108 216      5 5 5 5 5 5     
mmse   | 108 129 155 157 291 297 4 7 16 15 16 15f
cdr    | 157 197                 2 2f


whiskerPlot select visit,mmse,cdr from X_train

<Figure size 1440x720 with 0 Axes>


outlierTransform[`winsorize;`outliers]

2023.02.16D15:08:13.404391000 WARN For the visit feature, replacing outlier values: 
outlier    
-----------
5 5 5 5 5 5
2023.02.16D15:08:13.404434000 INFO With the winsorize value: 
winsor     
-----------
4 4 4 4 4 4
2023.02.16D15:08:13.404484000 WARN For the mmse feature, replacing outlier values: 
outlier        
---------------
4 7 16 15 16 15
2023.02.16D15:08:13.404516000 INFO With the winsorize value: 
winsor           
-----------------
29 29 29 29 29 29
2023.02.16D15:08:13.404547000 WARN For the cdr feature, replacing outlier values: 
outlier
-------
2 2    
2023.02.16D15:08:13.404571000 INFO With the winsorize value: 
winsor
------
1 1


trainInfo[X_train;`trainingStats;] each  cols[X_train] except `subjectId`mriId;


trainingStats

feature| maxVal   minVal   medVal   avgVal    stdVal   
-------| ----------------------------------------------
visit  | 4        1        2        1.855705  0.8685313
mrDelay| 7.878534 0        6.290642 4.01654   3.343527 
age    | 97       60       77       76.83221  7.640762 
educ   | 23       6        14       14.61074  2.900385 
ses    | 5        1        4        3.57047   1.106752 
mmse   | 30       17       29       27.69128  3.020954 
cdr    | 1        0        0        0.2818792 0.3538956
etiv   | 7.603638 7.024444 7.296861 7.301141  0.1160262
nwbv   | 0.837    0.644    0.729    0.7300034 0.0373581
asf    | 1.563    0.876    1.19     1.19297   0.1372664
F      | 1        0        1        0.5637584 0.4959182
M      | 1        0        0        0.4362416 0.4959182


dropCol[`X_train;`subjectId`mriId`cdr];


.log.info["Applying stdScaler transformation:";()];
scaleBeforeAfter[stdScaler;X_train;`mmse`etiv`asf`age];
.log.info["Applying minMax transformation:";()];
scaleBeforeAfter[minMaxScaler;X_train;`mmse`etiv`asf`age];

2023.02.16D15:08:13.405015000 INFO Applying stdScaler transformation:

2023.02.16D15:08:13.775445000 INFO Applying minMax transformation:


@[`X_train;cols X_train;stdScaler];


update stdScaler:{(x-y)%z}[;first avgVal;first stdVal],
       normaliser:{[x;minVal;maxVal] (x-minVal)%maxVal-minVal}[;first minVal;first maxVal],
       imputer:{y^x}[;first medVal] by feature from `trainingStats

`trainingStats


trainingStats[`visit]

maxVal    | 4f
minVal    | 1f
medVal    | 2f
avgVal    | 1.855705
stdVal    | 0.8685313
stdScaler | {(x-y)%z}[;1.855705;0.8685313]
normaliser| {[x;minVal;maxVal] (x-minVal)%maxVal-minVal}[;1f;4f]
imputer   | {y^x}[;2f]


update state:?[state=`Nondemented;0;1] from `y_train;
update state:?[state=`Nondemented;0;1] from `y_test;


asc count'[group y_train]

state|    
-----| ---
1    | 143
0    | 155


X_train:shuffle[cols X_train] xcols 0!X_train;


pipeline:{[t;s;c]
 if[not -11h= type t;'"Please pass table as reference"];
 $[1< count skc:`mrDelay`etiv inter cols t; skc; skc:first skc];
 .log.info["Log transforming highly-skewed columns";()];
 @[t;skc;{log(1+x)}];
 .log.info["Hot-encoding sym columns";()];
 hotEncode[t;] each exec c from meta[t] where t in "s";
 .log.info["Dropping irrelevant features";()];
 dropCol[t;c];
 `x1 set t;
 .log.info["Performing imputation and standard scaling on ",string[t];()];
 {[t;s;c;m] transform[t;s;c;m]}[t;s;cols t;] each `imputer`stdScaler;
 .log.info["Pipeline transform complete";()];
 : 10#get t }


pipeline[`X_test;trainingStats;`subjectId`mriId`mF`cdr]

2023.02.16D15:08:14.141399000 INFO Log transforming highly-skewed columns 
2023.02.16D15:08:14.141421000 INFO Hot-encoding sym columns 
2023.02.16D15:08:14.141455000 INFO Dropping irrelevant features 
2023.02.16D15:08:14.141467000 INFO Performing imputation and standard scaling on X_test 
2023.02.16D15:08:14.141476000 INFO imputer executing 
2023.02.16D15:08:14.141495000 INFO stdScaler executing 
2023.02.16D15:08:14.141512000 INFO Pipeline transform complete

visit      mrDelay   age        educ       ses        mmse       etiv       nwbv       asf        M          F        
----------------------------------------------------------------------------------------------------------------------
-0.9852318 -1.201288 1.330729   -0.2105714 -1.41899   -0.2288267 2.529743   -0.9102004 -2.258163  1.136797   -1.136797
1.317506   1.056059  0.4145902  -0.900135  0.3880998  -1.883933  1.177332   -0.7763606 -1.158112  1.136797   -1.136797
-0.9852318 -1.201288 -0.7633028 0.4789922  0.3880998  0.1021945  -0.7509522 0.4817334  0.7287306  1.136797   -1.136797
2.468875   0.9390194 -0.2397948 0.4789922  0.3880998  -0.2288267 -0.6576447 -0.535449  0.6267391  1.136797   -1.136797
0.1661371  0.7710042 0.4145902  0.4789922  -1.41899   0.4332157  -0.9711238 0.2140538  0.9691391  -0.8796644 0.8796644
-0.9852318 -1.201288 -0.1089178 0.4789922  -0.5154451 -2.214954  0.6758996  -0.8834324 -0.7064352 1.136797   -1.136797
0.1661371  0.660386  0.02195923 0.4789922  -0.5154451 -3.87006   0.6105911  -0.9102004 -0.6481544 1.136797   -1.136797
-0.9852318 -1.201288 1.461606   -2.279262  0.3880998  -0.8908692 0.9339126  -1.873847  -0.9468437 1.136797   -1.136797
1.317506   0.7207803 0.5454672  -0.900135  -0.5154451 -0.2288267 1.745446   0.776181   -1.638929  1.136797   -1.136797
2.468875   1.061994  1.985114   -0.2105714 -2.322535  -0.2288267 -0.3463126 -0.9102004 0.2989094  -0.8796644 0.8796644


X_test:shuffle[cols X_test] xcols 0!X_test;


tab2Array each `X_train`y_train`X_test`y_test

2023.02.16D15:08:14.141687000 INFO X_train converted to array 
2023.02.16D15:08:14.141712000 INFO y_train converted to array 
2023.02.16D15:08:14.141725000 INFO X_test converted to array 
2023.02.16D15:08:14.141740000 INFO y_test converted to array

`X_train`y_train`X_test`y_test


sm:smote[`k_neighbors pykw 5; `random_state pykw seed]


`X_train`y_train set' sm[`:fit_resample][X_train;y_train]`

`X_train`y_train


scores:([models:();parameters:()]DiagnosticOddsRatio:();TrainingAccuracy:();TestAccuracy:();TestAuc:())


evaluateAlgos[linearClassifiers;`noTuning]

2023.02.16D15:08:14.147153000 INFO BaselineModel auc_score= 0.5 
2023.02.16D15:08:14.156818000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.47      1.00      0.64        35
    Demented       0.00      0.00      0.00        40

    accuracy                           0.47        75
   macro avg       0.23      0.50      0.32        75
weighted avg       0.22      0.47      0.30        75

DOR score: 
============================================================

2023.02.16D15:08:14.164591000 INFO LogisticRegression auc_score= 0.6514286 
2023.02.16D15:08:14.165058000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.57      0.49      0.52        35
    Demented       0.60      0.68      0.64        40

    accuracy                           0.59        75
   macro avg       0.58      0.58      0.58        75
weighted avg       0.58      0.59      0.58        75

DOR score: 1.961538
============================================================

2023.02.16D15:08:14.169809000 INFO LinearDiscriminantAnalysis auc_score= 0.6892857 
2023.02.16D15:08:14.170311000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.59      0.66      0.62        35
    Demented       0.67      0.60      0.63        40

    accuracy                           0.63        75
   macro avg       0.63      0.63      0.63        75
weighted avg       0.63      0.63      0.63        75

DOR score: 2.875
============================================================


evaluateAlgos[nonLinearClassifiers;`noTuning]

2023.02.16D15:08:14.485497000 INFO NeuralNetworks auc_score= 0.6507143 
2023.02.16D15:08:14.500306000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.58      0.51      0.55        35
    Demented       0.61      0.68      0.64        40

    accuracy                           0.60        75
   macro avg       0.60      0.59      0.59        75
weighted avg       0.60      0.60      0.60        75

DOR score: 2.199095
============================================================

2023.02.16D15:08:14.520374000 INFO SVM auc_score= 0.6428571 
2023.02.16D15:08:14.521350000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.55      0.49      0.52        35
    Demented       0.59      0.65      0.62        40

    accuracy                           0.57        75
   macro avg       0.57      0.57      0.57        75
weighted avg       0.57      0.57      0.57        75

DOR score: 1.753968
============================================================

2023.02.16D15:08:14.529485000 INFO NaiveBayes auc_score= 0.5092857 
2023.02.16D15:08:14.530482000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.54      0.20      0.29        35
    Demented       0.55      0.85      0.67        40

    accuracy                           0.55        75
   macro avg       0.54      0.53      0.48        75
weighted avg       0.54      0.55      0.49        75

DOR score: 1.416667
============================================================


evaluateAlgos[treeBasedClassifiers;`noTuning]

2023.02.16D15:08:14.660051000 INFO DecisionTreeClassifier auc_score= 0.5464286 
2023.02.16D15:08:14.669666000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.55      0.34      0.42        35
    Demented       0.57      0.75      0.65        40

    accuracy                           0.56        75
   macro avg       0.56      0.55      0.53        75
weighted avg       0.56      0.56      0.54        75

DOR score: 1.565217
============================================================

2023.02.16D15:08:14.782504000 INFO RandomForests auc_score= 0.5425 
2023.02.16D15:08:14.782986000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.59      0.29      0.38        35
    Demented       0.57      0.82      0.67        40

    accuracy                           0.57        75
   macro avg       0.58      0.56      0.53        75
weighted avg       0.58      0.57      0.54        75

DOR score: 1.885714
============================================================

2023.02.16D15:08:14.845644000 INFO Adaboost auc_score= 0.5628571 
2023.02.16D15:08:14.846118000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.62      0.29      0.39        35
    Demented       0.58      0.85      0.69        40

    accuracy                           0.59        75
   macro avg       0.60      0.57      0.54        75
weighted avg       0.60      0.59      0.55        75

DOR score: 2.266667
============================================================

2023.02.16D15:08:14.904003000 INFO GradientBoost auc_score= 0.565 
2023.02.16D15:08:14.904641000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.69      0.26      0.37        35
    Demented       0.58      0.90      0.71        40

    accuracy                           0.60        75
   macro avg       0.64      0.58      0.54        75
weighted avg       0.63      0.60      0.55        75

DOR score: 3.115385
============================================================


`TestAuc xasc scores

models                     parameters| DiagnosticOddsRatio TrainingAccuracy TestAccuracy TestAuc  
-------------------------------------| -----------------------------------------------------------
BaselineModel              noTuning  |                     0.5              0.4666667    0.5      
NaiveBayes                 noTuning  | 1.416667            0.7612903        0.5466667    0.5092857
RandomForests              noTuning  | 1.885714            1                0.5733333    0.5425   
DecisionTreeClassifier     noTuning  | 1.565217            1                0.56         0.5464286
Adaboost                   noTuning  | 2.266667            0.8903226        0.5866667    0.5628571
GradientBoost              noTuning  | 3.115385            0.9967742        0.6          0.565    
SVM                        noTuning  | 1.753968            0.7903226        0.5733333    0.6428571
NeuralNetworks             noTuning  | 2.199095            0.8677419        0.6          0.6507143
LogisticRegression         noTuning  | 1.961538            0.7967742        0.5866667    0.6514286
LinearDiscriminantAnalysis noTuning  | 2.875               0.8032258        0.6266667    0.6892857


array2Tab each `X_train`X_test

2023.02.16D15:08:15.025761000 INFO X_train reverted back to q table 
2023.02.16D15:08:15.025796000 INFO X_test reverted back to q table

`X_train`X_test


featSelect[X_train;y_train;1+til 80;3]

2023.02.16D15:08:41.505694000 INFO Following features fell within the area of acceptance:  
nwbv   | 80
mmse   | 80
educ   | 75
etiv   | 57
asf    | 47
mrDelay| 44
age    | 24
2023.02.16D15:08:41.505789000 WARN Following features fell within area of refusal/irresolution:  
M      | 80
ses    | 80
visit  | 80
F      | 80
age    | 56
mrDelay| 36
asf    | 33
etiv   | 23
educ   | 5
2023.02.16D15:08:41.505824000 INFO Keeping top 3 boruta features for selection: nwbv,mmse,educ 
2023.02.16D15:08:41.505835000 INFO Reverting random seed back to 42


dropCol[`X_train;cols[X_train] except borutaFeatures]
dropCol[`X_test;cols[X_test] except borutaFeatures]

`X_train

`X_test


tab2Array each `X_train`X_test

2023.02.16D15:08:41.506085000 INFO X_train converted to array 
2023.02.16D15:08:41.506113000 INFO X_test converted to array

`X_train`X_test


optimalModels:([mdl:()]parameters:())


/Random Forest parameter space
rfParams:  (!) . flip(
 (`n_estimators; 15 25 30 35);
 (`min_samples_leaf; 1 + til 10);
 (`max_depth; 2 4 6);
 (`min_samples_split; 2 5 7 10 12);
 (`max_features; 2 3); 
 (`criterion; `gini`entropy);
 (`class_weight; `balanced`balanced_subsample)) 

/Support vector machine parameter space
svcParams:(!) . flip(
 (`kernel   ; ("rbf";"linear"));
 (`C        ; 0.0001 0.001 0.01 0.1 1);
 (`degree   ; 2 3 4); 
 (`gamma    ; 0.0001 0.001 0.01 0.1 0.5));

/Logistic regression parameter space
lrParams: (!) . flip(
 (`C        ; 0.0001 0.001 0.01 0.1 1.0 10 100);
 (`max_iter ; 1000 5000 10000 );
 (`solver;     ("newton-cg";"lbfgs"));
 (`penalty  ; enlist("l2"))) 

/Decision tree parameter space
dtParams: (!) . flip(
 (`max_leaf_nodes       ; 2+til 30);
 (`splitter             ; ("random";"best"));
 (`criterion            ; ("gini";"entropy"));
 (`max_depth            ; 1+til 10);
 (`min_samples_split    ; 0.1 0.2 0.3 0.5 0.6 0.7 0.8))

/Gradient Boosting
gbParams: (!) . flip(
 (`n_estimators      ; 500 1000 1500);
 (`learning_rate     ; 0.01 0.03 0.05 0.07);
 (`min_samples_split ; 2 4 6);
 (`min_samples_leaf  ; 3 5 7));

adaParams: (!) . flip(
 (`n_estimators      ; 500 1000 1500 2000);
 (`learning_rate     ; 0.05 0.1 0.15 0.2))


pspaces:(`SVM`LogisticRegression`DecisionTreeClassifier`Adaboost`GradientBoost`RandomForests)!(svcParams;lrParams;dtParams;adaParams;gbParams;rfParams)


hyperTune:{
 -1 (60#"="),"\n";
 .log.info["Hypertuning classifier: ",string[x];()];
 start:.z.p;
 //Dict look-up to find algorithm's parameter space
 clf:algoMappings x;
 //If model is svm, enable probability.Set same seed for each classifier
 $[x=`SVM;
   mdl:clf[`random_state pykw seed; `probability pykw 1b]; 
   mdl:clf[`random_state pykw seed]];
 //Check if optimizer method is valid, if not bomb out
 if[not z in `GridSearchCV`RandomizedSearchCV;
   : "Use GridSearchCV or RandomizedSearchCV as an optimizer!"];
 tuner:value string[z];
 //Naive Bayes does not perform grid search
 if[not x=`NaiveBayes;
    optParams:tunedEval[tuner[mdl;y;`scoring pykw "balanced_accuracy";`cv pykw 10;`n_jobs pykw "J"$getenv`THREADS]]];
 //Compute duration that grid-search/randomized grid search took
 .log.warn["Hypertuning parameters took: ",string[end:.z.p - start];()];
 if[x=`SVM; optParams[`probability]: 1b];
 //Track algorithm & its' optimal parameters, upsert to global table
 upsert[`optimalModels;(x;optParams)];
 //Call evaluateAlgos function
 evaluateAlgos[(enlist x)!enlist(clf[pykwargs optParams]);z]}


f:hyperTune[;;`RandomizedSearchCV]
/ Can also use the GridSearchCV optimizer to hypertune parameters - takes long, use threads (n_jobs)
/ f:hyperTune[;;`GridSearchCV]


eachKV[f] pspaces;

============================================================

2023.02.16D15:08:41.506652000 INFO Hypertuning classifier: SVM 
2023.02.16D15:08:42.275029000 INFO Best score during gridSearch is  - 0.7704167
2023.02.16D15:08:42.275120000 INFO Best parameter set: 
kernel| "linear"
gamma | 0.1
degree| 2
C     | 1f
2023.02.16D15:08:42.277060000 INFO Accuracy on training data  - 0.7806452
2023.02.16D15:08:42.277113000 INFO Accuracy on test data  - 0.7535714
2023.02.16D15:08:42.277157000 WARN Hypertuning parameters took: 0D00:00:00.770493000 
2023.02.16D15:08:42.284935000 INFO SVM auc_score= 0.8496429 
2023.02.16D15:08:42.295020000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.68      0.86      0.76        35
    Demented       0.84      0.65      0.73        40

    accuracy                           0.75        75
   macro avg       0.76      0.75      0.75        75
weighted avg       0.77      0.75      0.75        75

DOR score: 11.14286
============================================================

============================================================

2023.02.16D15:08:42.397489000 INFO Hypertuning classifier: LogisticRegression 
2023.02.16D15:08:42.742251000 INFO Best score during gridSearch is  - 0.7795833
2023.02.16D15:08:42.742330000 INFO Best parameter set: 
solver  | "lbfgs"
penalty | "l2"
max_iter| 1000
C       | 0.1
2023.02.16D15:08:42.743538000 INFO Accuracy on training data  - 0.783871
2023.02.16D15:08:42.743566000 INFO Accuracy on test data  - 0.7392857
2023.02.16D15:08:42.743593000 WARN Hypertuning parameters took: 0D00:00:00.346091000 
2023.02.16D15:08:42.747114000 INFO LogisticRegression auc_score= 0.8332143 
2023.02.16D15:08:42.757014000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.67      0.83      0.74        35
    Demented       0.81      0.65      0.72        40

    accuracy                           0.73        75
   macro avg       0.74      0.74      0.73        75
weighted avg       0.75      0.73      0.73        75

DOR score: 8.97619
============================================================

============================================================

2023.02.16D15:08:42.860474000 INFO Hypertuning classifier: DecisionTreeClassifier 
2023.02.16D15:08:43.204110000 INFO Best score during gridSearch is  - 0.7547917
2023.02.16D15:08:43.204259000 INFO Best parameter set: 
splitter         | "random"
min_samples_split| 0.2
max_leaf_nodes   | 25
max_depth        | 7
criterion        | "entropy"
2023.02.16D15:08:43.205748000 INFO Accuracy on training data  - 0.7709677
2023.02.16D15:08:43.205795000 INFO Accuracy on test data  - 0.7446429
2023.02.16D15:08:43.205832000 WARN Hypertuning parameters took: 0D00:00:00.345342000 
2023.02.16D15:08:43.208043000 INFO DecisionTreeClassifier auc_score= 0.8614286 
2023.02.16D15:08:43.218141000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.76      0.80      0.78        35
    Demented       0.82      0.78      0.79        40

    accuracy                           0.79        75
   macro avg       0.79      0.79      0.79        75
weighted avg       0.79      0.79      0.79        75

DOR score: 13.77778
============================================================

============================================================

2023.02.16D15:08:43.321302000 INFO Hypertuning classifier: Adaboost 
2023.02.16D15:09:11.438868000 INFO Best score during gridSearch is  - 0.7627083
2023.02.16D15:09:11.438954000 INFO Best parameter set: 
n_estimators | 500
learning_rate| 0.05
2023.02.16D15:09:11.528824000 INFO Accuracy on training data  - 0.816129
2023.02.16D15:09:11.528886000 INFO Accuracy on test data  - 0.7357143
2023.02.16D15:09:11.529202000 WARN Hypertuning parameters took: 0D00:00:28.207885000 
2023.02.16D15:09:12.063767000 INFO Adaboost auc_score= 0.8232143 
2023.02.16D15:09:12.073964000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.69      0.77      0.73        35
    Demented       0.78      0.70      0.74        40

    accuracy                           0.73        75
   macro avg       0.74      0.74      0.73        75
weighted avg       0.74      0.73      0.73        75

DOR score: 7.875
============================================================

============================================================

2023.02.16D15:09:12.178988000 INFO Hypertuning classifier: GradientBoost 
2023.02.16D15:09:21.946525000 INFO Best score during gridSearch is  - 0.7283333
2023.02.16D15:09:21.946612000 INFO Best parameter set: 
n_estimators     | 500
min_samples_split| 4
min_samples_leaf | 5
learning_rate    | 0.03
2023.02.16D15:09:21.949596000 INFO Accuracy on training data  - 0.916129
2023.02.16D15:09:21.949672000 INFO Accuracy on test data  - 0.7089286
2023.02.16D15:09:21.949783000 WARN Hypertuning parameters took: 0D00:00:09.770779000 
2023.02.16D15:09:22.116285000 INFO GradientBoost auc_score= 0.7732143 
2023.02.16D15:09:22.126278000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.67      0.74      0.70        35
    Demented       0.75      0.68      0.71        40

    accuracy                           0.71        75
   macro avg       0.71      0.71      0.71        75
weighted avg       0.71      0.71      0.71        75

DOR score: 6
============================================================

============================================================

2023.02.16D15:09:22.230959000 INFO Hypertuning classifier: RandomForests 
2023.02.16D15:09:22.937197000 INFO Best score during gridSearch is  - 0.77375
2023.02.16D15:09:22.937276000 INFO Best parameter set: 
n_estimators     | 25
min_samples_split| 5
min_samples_leaf | 6
max_features     | 2
max_depth        | 6
criterion        | "entropy"
class_weight     | "balanced"
2023.02.16D15:09:22.942533000 INFO Accuracy on training data  - 0.8225806
2023.02.16D15:09:22.942579000 INFO Accuracy on test data  - 0.7107143
2023.02.16D15:09:22.942635000 WARN Hypertuning parameters took: 0D00:00:00.711654000 
2023.02.16D15:09:22.972504000 INFO RandomForests auc_score= 0.8332143 
2023.02.16D15:09:22.982578000 INFO Classification report showing precision, recall and f1-score for each class: 
              precision    recall  f1-score   support

 Nondemented       0.66      0.77      0.71        35
    Demented       0.76      0.65      0.70        40

    accuracy                           0.71        75
   macro avg       0.71      0.71      0.71        75
weighted avg       0.72      0.71      0.71        75

DOR score: 6.267857
============================================================


`TestAuc xdesc scores

models                     parameters        | DiagnosticOddsRatio TrainingAccuracy TestAccuracy TestAuc  
---------------------------------------------| -----------------------------------------------------------
DecisionTreeClassifier     RandomizedSearchCV| 13.77778            0.7483871        0.7866667    0.8614286
SVM                        RandomizedSearchCV| 11.14286            0.7806452        0.7466667    0.8496429
LogisticRegression         RandomizedSearchCV| 8.97619             0.783871         0.7333333    0.8332143
RandomForests              RandomizedSearchCV| 6.267857            0.816129         0.7066667    0.8332143
Adaboost                   RandomizedSearchCV| 7.875               0.816129         0.7333333    0.8232143
GradientBoost              RandomizedSearchCV| 6                   0.916129         0.7066667    0.7732143
LinearDiscriminantAnalysis noTuning          | 2.875               0.8032258        0.6266667    0.6892857
LogisticRegression         noTuning          | 1.961538            0.7967742        0.5866667    0.6514286
NeuralNetworks             noTuning          | 2.199095            0.8677419        0.6          0.6507143
SVM                        noTuning          | 1.753968            0.7903226        0.5733333    0.6428571
GradientBoost              noTuning          | 3.115385            0.9967742        0.6          0.565    
Adaboost                   noTuning          | 2.266667            0.8903226        0.5866667    0.5628571
DecisionTreeClassifier     noTuning          | 1.565217            1                0.56         0.5464286
RandomForests              noTuning          | 1.885714            1                0.5733333    0.5425   
NaiveBayes                 noTuning          | 1.416667            0.7612903        0.5466667    0.5092857
BaselineModel              noTuning          |                     0.5              0.4666667    0.5


`AucDiff xdesc 
  select BeforeAfter:TestAuc, AucDiff: abs .[-;TestAuc] by models from scores 
  where models in `SVM`LogisticRegression`DecisionTreeClassifier`Adaboost`GradientBoost`RandomForests

models                | BeforeAfter         AucDiff  
----------------------| -----------------------------
DecisionTreeClassifier| 0.5464286 0.8614286 0.315    
RandomForests         | 0.5425    0.8332143 0.2907143
Adaboost              | 0.5628571 0.8232143 0.2603571
GradientBoost         | 0.565     0.7732143 0.2082143
SVM                   | 0.6428571 0.8496429 0.2067857
LogisticRegression    | 0.6514286 0.8332143 0.1817857


modelsTrained:1b


.z.ws:{
 d:value x;
 //Check models have been evaluated
 if[not modelsTrained;
    neg[.z.w] .j.j "Models havent been trained yet. Cant predict a score!";
    :()
 ];
 //Get classifier from dict
 clf:d[`algo];
 //If boruta is enabled, use features computed by boruta algo, if not default to only using mmse,educ and age fields 
 $[`Y=d`boruta;borutaFeatures:`nwbv`mmse`educ;borutaFeatures:`mmse`educ`age];
 //Drop algo and boruta fields from dict
 d:(`algo`boruta) _ d;
 //Change to table fmt and assign it to global t table
 `t set enlist d;
 //call pipeline fn to clean the dataset
 pipeline[`t;trainingStats;cols[t] except borutaFeatures];
 //Convert to python array
 tab2Array[`t];
 //get optimal parameters for classifier
 p:optimalModels[clf;`parameters];
 //configure new model using opt params
 m:algoMappings[clf][pykwargs p];
 //fit new model with training sets
 m[`:fit][X_train;y_train];
 //predict probability of subject being demented
 pred:m[`:predict_proba][t]`;
 res: raze pred;
 //convert to json and send output back to the handle which made the request
 neg[.z.w] .j.j "F"$.Q.fmt[5;3] res 1;
 }


kernel:`linear`rbf`poly
C:0.01 0.1 1 10 100 1000
gamma:0.01 1 10 100 1000


datasets:.p.import[`sklearn.datasets]
iris:datasets[`:load_iris][]`;
X:iris[`data;;0 1]
Y:iris[`target]


plotSVC:{[title;model]
 x_min:-1+min first flip X;
 x_max:1+max first flip X;
 y_min:-1+min last flip X;
 y_max:1+max last flip X;
 h:(x_max%x_min)%100;
 x1:np[`:arange][x_min;x_max; h]`;
 y1:np[`:arange][y_min;y_max; h]`;
 b:(np[`:meshgrid][x1;y1]`);
 `xx`yy set' b;
 plt[`:subplot][1;1;1];
 z:model[`:predict][flip(raze xx;raze yy)];
 z1:164 208 # z`;
 plt[`:contourf][xx;yy;z1;`cmap pykw plt[`:cm][`:Paired];`alpha pykw 0.8];
 plt[`:scatter][X[;0]; X[;1]; `c pykw Y; `cmap pykw plt[`:cm][`:Paired]];
 plt[`:xlabel]"Sepal length";
 plt[`:ylabel]"Sepal width";
 plt[`:xlim][min first xx; max last xx];
 plt[`:title] title;
 plt[`:show][] }


overfit:{[x;y]
   title:string[x],"=",string[y];
   $[x in `gamma`C; 
     mdl:svc[`kernel pykw `rbf; x pykw y][`:fit][X;Y];
     mdl:svc[x pykw y][`:fit][X;Y]];
   plotSVC[title;mdl];
 }


overfit[`kernel;] each kernel;


overfit[`C;] each C;


overfit[`gamma;]each gamma;


outlier3Dplot[data;outliers;numCols];

Using a contamination value: 0.03753351

Model	Pros	Cons
Logistic Regression	Due to its simplistic nature and easy implementation,it is an ideal baseline for any binary classification problem.	This model is prone to overfitting and doesn’t fare well on independent variables that are in no way correlated with the target variable.
	Following the reasoning of Occam’s Razor, given the size of the MRI dataset,the application of simple models could yield the best results.
	It also doesn’t require much computational power and doesn’t require the scaling of features(quicker fitting time)

Model	Pros	Cons
SVM	Can handle the case where the relationship that exists between the features and target are non-linear (kernel-trick)	Memory intensive
	Has few hyperparameters to tune– in the case of this dataset, C and gamma will be the hyperparameters that will need tuning as the ‘rbf’ kernel will be used.	Can be long fitting times

Model	Pros	Cons
Naive Bayes	The training set is small and thus high bias/low variance classifiers (i.e. Naïve bayes) should have an advantage over complex models which may have a tendency to overfit.	Has trouble learning the interaction between different features
	Extremely simple to implement.

Model	Pros	Cons
Decision Trees	Decision trees require very little data preparation i.e. don’t require feature scaling. They are the fundamental concept behind the Random Forest model	Main disadvantage is that an increase in variance which leads to poor generalization (tendency to overfit).
	Decision trees are fairly intuitive and thus their decisions are easy to interpret i.e. they provide simple classification rules that can be applied manually if need be (known as ‘White box’ modelling).	Small variations in data can result in different decision trees

Model	Pros	Cons
Random Forests	The random forest model is commonly referred to as the ‘Leatherman’ of learning methods and thus can be fitted to most regression and classification tasks.[20]	‘Black box’ model that is very hard to interpret (in comparison to decision trees).
	Although one should undertake explicit efforts to avoid overfitting (cross validation etc) as not every algorithm is immune to overfit, RF’s are less likely to overfit	A large number of trees may make the model slow down when making predictions
	It can handle a large number of features and can help estimate which features are particularly important in the underlying data

Table of Contents

A machine learning approach to help detect the early onset of dementia¶

Data Wrangling¶

Loading in the longnitudinal dataset¶

Extracting target for binary classification¶

Investigating data structure¶

Remove zero-variance predictor¶

Split dataset into train and test sets¶

Exploratory data analysis¶

Data pre-processing¶

Extracting numerical and categorical features¶

Logarithmic transform¶

Categorical encoding¶

get_dummies method¶

Correlation between features¶

One-hot encoding¶

Imputation¶

Investigating and dealing with outliers¶

Gathering training statistics¶

Drop irrelevant & collinear features¶

Standardising or Normalising¶

Encode target group to numerical values¶

Shuffle training columns¶

Pipe transform¶

Evaluation¶

Feature selection¶

Hyperparameter Tuning¶

Creating a web GUI to predict unseen cases¶

Customising .z.ws¶

Conclusion¶

Appendix¶

Overfitting in SVM¶

Using Isolation Forest to view outliers in 3D space¶

Machine Learning algorithms¶

Pros vs Cons¶

Sources¶

Model	Pros	Cons
Adaboost	It's simple to implement	Can be sensitive to noisy data or data which contains outliers
	Not overly prone to overfitting

Model	Pros	Cons
Gradient Boosting	Can produce high prediction accuracies	Computationally expensive
	It can work on datasets that have missing features	It can cause overfitting due to the model trying to continually minimise all errors
	Few preprocessing steps need to be implemented as it can handle both numerical and categorical data