Se ha denunciado esta presentación.
Se está descargando tu SlideShare. ×

M11 bagging loo cv

Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Próximo SlideShare
M12 random forest-part01
M12 random forest-part01
Cargando en…3
×

Eche un vistazo a continuación

1 de 11 Anuncio

Más Contenido Relacionado

Presentaciones para usted (20)

Similares a M11 bagging loo cv (20)

Anuncio

Más reciente (20)

Anuncio

M11 bagging loo cv

  1. 1. Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 1 of 11 11/23/2020, 5:39 PM
  2. 2. library('e1071') file<-'c://Users/rk215/Data/heart.csv' heart<-read.csv(file,head=T,sep=',',stringsAsFactors=F) head(heart) ## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca tha l ## 1 63 1 3 145 233 1 0 150 0 2.3 0 0 1 ## 2 37 1 2 130 250 0 1 187 0 3.5 0 0 2 ## 3 41 0 1 130 204 0 0 172 0 1.4 2 0 2 ## 4 56 1 1 120 236 0 1 178 0 0.8 2 0 2 ## 5 57 0 0 120 354 0 1 163 1 0.6 2 0 2 ## 6 57 1 0 140 192 0 1 148 0 0.4 1 0 1 ## target ## 1 1 ## 2 1 ## 3 1 ## 4 1 ## 5 1 ## 6 1 catheart<-heart[,c(2,3,6,7,9,11,12,13,14)] set.seed(43) trdidx<-sample(1:nrow(catheart),0.7*nrow(catheart),replace=F) trcatheart<-catheart[trdidx,] tstcatheart<-catheart[-trdidx,] nb.model<-naiveBayes(target~.,data=trcatheart) #str(nb.model) object.size(nb.model) #11096 ## 11096 bytes nb.tstpred<-predict(nb.model,tstcatheart[,-c(9)],type='raw') nb.tstclass<-unlist(apply(round(nb.tstpred),1,which.max))-1 nb.tbl<-table(tstcatheart[[9]], nb.tstclass) nb.cfm<-caret::confusionMatrix(nb.tbl) nb.cfm Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 2 of 11 11/23/2020, 5:39 PM
  3. 3. ## Confusion Matrix and Statistics ## ## nb.tstclass ## 0 1 ## 0 28 12 ## 1 3 48 ## ## Accuracy : 0.8352 ## 95% CI : (0.7427, 0.9047) ## No Information Rate : 0.6593 ## P-Value [Acc > NIR] : 0.0001482 ## ## Kappa : 0.6571 ## ## Mcnemar's Test P-Value : 0.0388671 ## ## Sensitivity : 0.9032 ## Specificity : 0.8000 ## Pos Pred Value : 0.7000 ## Neg Pred Value : 0.9412 ## Prevalence : 0.3407 ## Detection Rate : 0.3077 ## Detection Prevalence : 0.4396 ## Balanced Accuracy : 0.8516 ## ## 'Positive' Class : 0 ## start_tm <- proc.time() df<-trcatheart runModel<-function(df) {naiveBayes(target~.,data=df[sample(1:nrow(df),nrow(d f),replace=T),])} lapplyrunmodel<-function(x)runModel(df) system.time(models<-lapply(1:100,lapplyrunmodel)) ## user system elapsed ## 0.32 0.02 0.33 object.size(models) ## 1110448 bytes end_tm<-proc.time() print(paste("time taken to run 100 bootstrapps",(end_tm-start_tm),sep=":")) Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 3 of 11 11/23/2020, 5:39 PM
  4. 4. ## [1] "time taken to run 100 bootstrapps:0.46" ## [2] "time taken to run 100 bootstrapps:0.02" ## [3] "time taken to run 100 bootstrapps:0.47" ## [4] "time taken to run 100 bootstrapps:NA" ## [5] "time taken to run 100 bootstrapps:NA" bagging_preds<-lapply(models,FUN=function(M,D=tstcatheart[,-c(9)])predict(M, D,type='raw')) bagging_cfm<-lapply(bagging_preds,FUN=function(P,A=tstcatheart[[9]]) {pred_class<-unlist(apply(round(P),1,which.max))-1 pred_tbl<-table(A,pred_class) pred_cfm<-caret::confusionMatrix(pred_tbl) pred_cfm }) bagging.perf<-as.data.frame(do.call('rbind',lapply(bagging_cfm,FUN=function (cfm)c(cfm$overall,cfm$byClass)))) bagging.perf.mean<-apply(bagging.perf[bagging.perf$AccuracyPValue<0.01,-c(6: 7)],2,mean) bagging.perf.var<-apply(bagging.perf[bagging.perf$AccuracyPValue<0.01,-c(6: 7)],2,sd) bagging.perf.var ## Accuracy Kappa AccuracyLower ## 0.01618750 0.03355331 0.01846838 ## AccuracyUpper AccuracyNull Sensitivity ## 0.01273569 0.01795716 0.03073122 ## Specificity Pos Pred Value Neg Pred Value ## 0.01470108 0.02693220 0.02200582 ## Precision Recall F1 ## 0.02693220 0.03073122 0.02087685 ## Prevalence Detection Rate Detection Prevalence ## 0.01795716 0.01183833 0.00000000 ## Balanced Accuracy ## 0.01875328 bagging.perf.mean Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 4 of 11 11/23/2020, 5:39 PM
  5. 5. ## Accuracy Kappa AccuracyLower ## 0.8323565 0.6521225 0.7396540 ## AccuracyUpper AccuracyNull Sensitivity ## 0.9023711 0.6496947 0.8891540 ## Specificity Pos Pred Value Neg Pred Value ## 0.8025070 0.7077778 0.9300654 ## Precision Recall F1 ## 0.7077778 0.8891540 0.7876655 ## Prevalence Detection Rate Detection Prevalence ## 0.3503053 0.3111111 0.4395604 ## Balanced Accuracy ## 0.8458305 (bagging_tm<-proc.time()-start_tm) ## user system elapsed ## 2.35 0.02 2.36 N<-nrow(trcatheart) cv_df<-do.call('rbind',lapply(1:N,FUN=function(idx,data=trcatheart) { # For each observation m<-naiveBayes(target~.,data=data[-idx,]) # train with ALL other observatio ns p<-predict(m,data[idx,-c(9)],type='raw') # predict that one observation # NB returns the probabilities of the classes, as per Bayesian Classifie r,we take the classs with the higher probability pc<-unlist(apply(round(p),1,which.max))-1 # -1 to make class to be 0 or 1, which.max returns 1 or 2 #pred_tbl<-table(data[idx,c(9)],pc) #pred_cfm<-caret::confusionMatrix(pred_tbl) list(fold=idx,m=m,predicted=pc,actual=data[idx,c(9)]) # store the idx, mod el, predicted class and actual class } )) cv_df<-as.data.frame(cv_df) head(cv_df) Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 5 of 11 11/23/2020, 5:39 PM
  6. 6. ## fold ## 1 1 ## 2 2 ## 3 3 ## 4 4 ## 5 5 ## 6 6 ## m ## 1 98, 113, 0.8061224, 0.5840708, 0.3973667, 0.4950769, 0.4387755, 1.35398 2, 0.8623431, 0.9535681, 0.1428571, 0.1327434, 0.3517262, 0.3408085, 0.43877 55, 0.6106195, 0.5385419, 0.5076843, 0.5510204, 0.1769912, 0.4999474, 0.3833 613, 1.132653, 1.530973, 0.5493718, 0.6277909, 1.285714, 0.3539823, 1.03545 4, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, TR UE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapla ce) ## 2 97, 114, 0.8041237, 0.5789474, 0.3989354, 0.4959078, 0.443299, 1. 342105, 0.8656533, 0.9577716, 0.1443299, 0.1315789, 0.3532495, 0.3395249, 0. 443299, 0.6052632, 0.5394649, 0.5086582, 0.5463918, 0.1754386, 0.5004294, 0. 382021, 1.123711, 1.526316, 0.5450102, 0.6269822, 1.278351, 0.3508772, 1.038 25, 0.8822984, 2.57732, 2.122807, 0.658835, 0.4822578, TRUE, TRUE, TRUE, TRU E, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = laplac e) ## 3 97, 114, 0.8041237, 0.5789474, 0.3989354, 0.4959078, 0.443299, 1. 342105, 0.8656533, 0.9577716, 0.1443299, 0.1315789, 0.3532495, 0.3395249, 0. 443299, 0.6052632, 0.5394649, 0.5086582, 0.5463918, 0.1754386, 0.5004294, 0. 382021, 1.14433, 1.526316, 0.5398628, 0.6269822, 1.298969, 0.3508772, 1.0324 42, 0.8822984, 2.57732, 2.122807, 0.658835, 0.4822578, TRUE, TRUE, TRUE, TRU E, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = laplac e) ## 4 98, 113, 0.8061224, 0.5752212, 0.3973667, 0.4965112, 0.4387755, 1.33628 3, 0.8623431, 0.9600095, 0.1428571, 0.1327434, 0.3517262, 0.3408085, 0.43877 55, 0.6017699, 0.5385419, 0.5095485, 0.5510204, 0.1769912, 0.4999474, 0.3833 613, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3539823, 1.03545 4, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, TR UE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapla ce) ## 5 98, 113, 0.8061224, 0.5840708, 0.3973667, 0.4950769, 0.4387755, 1.35398 2, 0.8623431, 0.9535681, 0.1428571, 0.1327434, 0.3517262, 0.3408085, 0.43877 55, 0.6017699, 0.5385419, 0.5095485, 0.5510204, 0.1769912, 0.4999474, 0.3833 613, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3539823, 1.03545 4, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, TR UE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapla ce) ## 6 97, 114, 0.814433, 0.5789474, 0.3907764, 0.4959078, 0.443299, 1. 342105, 0.8656533, 0.9577716, 0.1340206, 0.1315789, 0.3424442, 0.3395249, 0. 4329897, 0.6052632, 0.5382691, 0.5086582, 0.5463918, 0.1754386, 0.5004294, 0.382021, 1.134021, 1.526316, 0.552058, 0.6269822, 1.278351, 0.3508772, 1.03 825, 0.8822984, 2.57732, 2.122807, 0.658835, 0.4822578, TRUE, TRUE, TRUE, TR UE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapla Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 6 of 11 11/23/2020, 5:39 PM
  7. 7. ce) ## predicted actual ## 1 1 1 ## 2 0 0 ## 3 0 0 ## 4 1 1 ## 5 1 1 ## 6 0 0 tail(cv_df) Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 7 of 11 11/23/2020, 5:39 PM
  8. 8. ## fold ## 207 207 ## 208 208 ## 209 209 ## 210 210 ## 211 211 ## 212 212 ## m ## 207 98, 113, 0.8061224, 0.5840708, 0.3973667, 0.4950769, 0.4387755, 1.336 283, 0.8623431, 0.9600095, 0.1428571, 0.1238938, 0.3517262, 0.3309279, 0.438 7755, 0.6017699, 0.5385419, 0.5095485, 0.5510204, 0.1769912, 0.4999474, 0.38 33613, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3539823, 1.0354 54, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, T RUE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapl ace) ## 208 98, 113, 0.8061224, 0.5752212, 0.3973667, 0.4965112, 0.4387755, 1.345 133, 0.8623431, 0.9614898, 0.1428571, 0.1238938, 0.3517262, 0.3309279, 0.438 7755, 0.6106195, 0.5385419, 0.5076843, 0.5510204, 0.1769912, 0.4999474, 0.38 33613, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3539823, 1.0354 54, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, T RUE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapl ace) ## 209 98, 113, 0.8061224, 0.5752212, 0.3973667, 0.4965112, 0.4387755, 1.353 982, 0.8623431, 0.9535681, 0.1428571, 0.1327434, 0.3517262, 0.3408085, 0.438 7755, 0.6017699, 0.5385419, 0.5095485, 0.5510204, 0.1769912, 0.4999474, 0.38 33613, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3539823, 1.0354 54, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, T RUE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapl ace) ## 210 98, 113, 0.8061224, 0.5840708, 0.3973667, 0.4950769, 0.4387755, 1.345 133, 0.8623431, 0.9614898, 0.1428571, 0.1238938, 0.3517262, 0.3309279, 0.438 7755, 0.6106195, 0.5385419, 0.5076843, 0.5510204, 0.1681416, 0.4999474, 0.37 56579, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3451327, 1.0354 54, 0.8840846, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, T RUE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapl ace) ## 211 97, 114, 0.8041237, 0.5789474, 0.3989354, 0.4959078, 0.443299, 1.342105, 0.8656533, 0.9577716, 0.1443299, 0.1315789, 0.3532495, 0.3395249, 0.4329897, 0.6052632, 0.5382691, 0.5086582, 0.5463918, 0.1754386, 0.5004294, 0.382021, 1.134021, 1.526316, 0.552058, 0.6269822, 1.28866, 0.3508772, 1.040 42, 0.8822984, 2.57732, 2.122807, 0.658835, 0.4822578, TRUE, TRUE, TRUE, TRU E, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = laplac e) ## 212 98, 113, 0.8061224, 0.5840708, 0.3973667, 0.4950769, 0.4387755, 1.345 133, 0.8623431, 0.9614898, 0.1428571, 0.1327434, 0.3517262, 0.3408085, 0.438 7755, 0.6106195, 0.5385419, 0.5076843, 0.5510204, 0.1769912, 0.4999474, 0.38 33613, 1.132653, 1.522124, 0.5493718, 0.6281683, 1.285714, 0.3539823, 1.0354 54, 0.8856026, 2.581633, 2.123894, 0.6568194, 0.4842657, TRUE, TRUE, TRUE, T RUE, TRUE, TRUE, TRUE, TRUE, naiveBayes.default(x = X, y = Y, laplace = lapl Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 8 of 11 11/23/2020, 5:39 PM
  9. 9. ace) ## predicted actual ## 207 1 1 ## 208 1 1 ## 209 1 1 ## 210 1 1 ## 211 0 0 ## 212 1 1 table(as.numeric(cv_df$actual)==as.numeric(cv_df$predicted)) ## ## FALSE TRUE ## 34 178 loocv_tbl<-table(as.numeric(cv_df$actual),as.numeric(cv_df$predicted)) sum(diag(loocv_tbl))/sum(loocv_tbl) ## [1] 0.8396226 (loocv_caret_cfm<-caret::confusionMatrix(loocv_tbl)) Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 9 of 11 11/23/2020, 5:39 PM
  10. 10. ## Confusion Matrix and Statistics ## ## ## 0 1 ## 0 79 19 ## 1 15 99 ## ## Accuracy : 0.8396 ## 95% CI : (0.7832, 0.8863) ## No Information Rate : 0.5566 ## P-Value [Acc > NIR] : <2e-16 ## ## Kappa : 0.6765 ## ## Mcnemar's Test P-Value : 0.6069 ## ## Sensitivity : 0.8404 ## Specificity : 0.8390 ## Pos Pred Value : 0.8061 ## Neg Pred Value : 0.8684 ## Prevalence : 0.4434 ## Detection Rate : 0.3726 ## Detection Prevalence : 0.4623 ## Balanced Accuracy : 0.8397 ## ## 'Positive' Class : 0 ## # now we have to apply the training models to testdata and average them # since this is classification we will take the majority vote # double loop tstcv.perf<-as.data.frame(do.call('cbind',lapply(cv_df$m,FUN=function(m,data =tstcatheart) { v<-predict(m,data[,-c(9)],type='raw') lbllist<-unlist(apply(round(v),1,which.max))-1 } ))) np<-ncol(tstcv.perf) predclass<-unlist(apply(tstcv.perf,1,FUN=function(v){ ifelse(sum(v[2:length (v)])/np<0.5,0,1)})) loocvtbl<-table(tstcatheart[,c(9)],predclass) (loocv_cfm<-caret::confusionMatrix(loocvtbl)) Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 10 of 11 11/23/2020, 5:39 PM
  11. 11. ## Confusion Matrix and Statistics ## ## predclass ## 0 1 ## 0 28 12 ## 1 3 48 ## ## Accuracy : 0.8352 ## 95% CI : (0.7427, 0.9047) ## No Information Rate : 0.6593 ## P-Value [Acc > NIR] : 0.0001482 ## ## Kappa : 0.6571 ## ## Mcnemar's Test P-Value : 0.0388671 ## ## Sensitivity : 0.9032 ## Specificity : 0.8000 ## Pos Pred Value : 0.7000 ## Neg Pred Value : 0.9412 ## Prevalence : 0.3407 ## Detection Rate : 0.3077 ## Detection Prevalence : 0.4396 ## Balanced Accuracy : 0.8516 ## ## 'Positive' Class : 0 ## print(paste('Bagging:',bagging.perf.mean[1])) ## [1] "Bagging: 0.832356532356532" print(paste('LOO-CV:',loocv_cfm$overall[1])) ## [1] "LOO-CV: 0.835164835164835" print(paste('Base NB',nb.cfm$overall[[1]])) ## [1] "Base NB 0.835164835164835" Bootstrap Aggregation-LooCV-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/m11-bagging-loocv/baggi... 11 of 11 11/23/2020, 5:39 PM

×