Se ha denunciado esta presentación.
Se está descargando tu SlideShare. ×

M09-Cross validating-naive-bayes

Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Anuncio
Próximo SlideShare
M11 bagging loo cv
M11 bagging loo cv
Cargando en…3
×

Eche un vistazo a continuación

1 de 7 Anuncio

M09-Cross validating-naive-bayes

Descargar para leer sin conexión

Instead of Tree or other weak classifiers we take NaiveBayes which is not necessarily a weak learner and evaluate what happens when Cross Validate a not so weak learner.

Instead of Tree or other weak classifiers we take NaiveBayes which is not necessarily a weak learner and evaluate what happens when Cross Validate a not so weak learner.

Anuncio
Anuncio

Más Contenido Relacionado

Presentaciones para usted (20)

Similares a M09-Cross validating-naive-bayes (20)

Anuncio

Más reciente (20)

Anuncio

M09-Cross validating-naive-bayes

  1. 1. Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 1 of 7 11/23/2020, 5:27 PM
  2. 2. Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 2 of 7 11/23/2020, 5:27 PM
  3. 3. library('e1071') file<-'c://Users/rk215/Data/heart.csv' heart<-read.csv(file,head=T,sep=',',stringsAsFactors=F) head(heart) ## age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca tha l ## 1 63 1 3 145 233 1 0 150 0 2.3 0 0 1 ## 2 37 1 2 130 250 0 1 187 0 3.5 0 0 2 ## 3 41 0 1 130 204 0 0 172 0 1.4 2 0 2 ## 4 56 1 1 120 236 0 1 178 0 0.8 2 0 2 ## 5 57 0 0 120 354 0 1 163 1 0.6 2 0 2 ## 6 57 1 0 140 192 0 1 148 0 0.4 1 0 1 ## target ## 1 1 ## 2 1 ## 3 1 ## 4 1 ## 5 1 ## 6 1 catheart<-heart[,c(2,3,6,7,9,11,12,13,14)] set.seed(43) trdidx<-sample(1:nrow(catheart),0.7*nrow(catheart),replace=F) trcatheart<-catheart[trdidx,] tstcatheart<-catheart[-trdidx,] nb.model<-naiveBayes(target~.,data=trcatheart) #str(nbtr.model) object.size(nb.model) #11096 ## 11096 bytes nb.pred<-predict(nb.model,tstcatheart[,-c(9)],type='raw') nb.class<-unlist(apply(round(nb.pred),1,which.max))-1 nb.tbl<-table(tstcatheart[[9]], nb.class) nb.cfm<-caret::confusionMatrix(nb.tbl) nb.cfm Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 3 of 7 11/23/2020, 5:27 PM
  4. 4. ## Confusion Matrix and Statistics ## ## nb.class ## 0 1 ## 0 28 12 ## 1 3 48 ## ## Accuracy : 0.8352 ## 95% CI : (0.7427, 0.9047) ## No Information Rate : 0.6593 ## P-Value [Acc > NIR] : 0.0001482 ## ## Kappa : 0.6571 ## ## Mcnemar's Test P-Value : 0.0388671 ## ## Sensitivity : 0.9032 ## Specificity : 0.8000 ## Pos Pred Value : 0.7000 ## Neg Pred Value : 0.9412 ## Prevalence : 0.3407 ## Detection Rate : 0.3077 ## Detection Prevalence : 0.4396 ## Balanced Accuracy : 0.8516 ## ## 'Positive' Class : 0 ## start_tm <- proc.time() N<-nrow(trcatheart) NF=10 folds<-split(1:N,cut(1:N, quantile(1:N, probs = seq(0, 1, by =1/NF)))) length(folds) ## [1] 10 lapply(folds,length) Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 4 of 7 11/23/2020, 5:27 PM
  5. 5. ## $`(1,22.1]` ## [1] 21 ## ## $`(22.1,43.2]` ## [1] 21 ## ## $`(43.2,64.3]` ## [1] 21 ## ## $`(64.3,85.4]` ## [1] 21 ## ## $`(85.4,106]` ## [1] 21 ## ## $`(106,128]` ## [1] 21 ## ## $`(128,149]` ## [1] 21 ## ## $`(149,170]` ## [1] 21 ## ## $`(170,191]` ## [1] 21 ## ## $`(191,212]` ## [1] 22 ridx<-sample(1:nrow(trcatheart),nrow(trcatheart),replace=FALSE) # randomize the data cv_df<-do.call('rbind',lapply(folds,FUN=function(idx,data=trcatheart[ridx,]) { m<-naiveBayes(target~.,data=data[-idx,]) # keep one fold for validation p<-predict(m,data[idx,-c(9)],type='raw') # predict for that test fold pc<-unlist(apply(round(p),1,which.max))-1 pred_tbl<-table(data[idx,c(9)],pc) #table(actual,predicted) pred_cfm<-caret::confusionMatrix(pred_tbl) list(fold=idx,m=m,cfm=pred_cfm) # store the fold, model,cfm } )) # lapply repeats over all folds Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 5 of 7 11/23/2020, 5:27 PM
  6. 6. cv_df<-as.data.frame(cv_df) tstcv.perf<-as.data.frame(do.call('rbind',lapply(cv_df$cfm,FUN=function(cfm) c(cfm$overall,cfm$byClass)))) (cv.tst.perf<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,mea n)) ## Accuracy Kappa AccuracyLower ## 0.8683983 0.7318000 0.6545460 ## AccuracyUpper AccuracyNull Sensitivity ## 0.9700452 0.5666667 0.8373377 ## Specificity Pos Pred Value Neg Pred Value ## 0.8900699 0.8924825 0.8629060 ## Precision Recall F1 ## 0.8924825 0.8373377 0.8583395 ## Prevalence Detection Rate Detection Prevalence ## 0.4523810 0.3766234 0.4324675 ## Balanced Accuracy ## 0.8637038 (cv.tst.perf.var<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)], 2,sd)) ## Accuracy Kappa AccuracyLower ## 0.06018717 0.11323819 0.07464027 ## AccuracyUpper AccuracyNull Sensitivity ## 0.02365967 0.07221786 0.06971453 ## Specificity Pos Pred Value Neg Pred Value ## 0.12234605 0.10504433 0.07414154 ## Precision Recall F1 ## 0.10504433 0.06971453 0.04174328 ## Prevalence Detection Rate Detection Prevalence ## 0.08908708 0.06878895 0.12277136 ## Balanced Accuracy ## 0.05126200 Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 6 of 7 11/23/2020, 5:27 PM
  7. 7. tstcv_preds<-lapply(cv_df$m,FUN=function(M,D=tstcatheart[,-c(9)])predict(M, D,type='raw')) tstcv_cfm<-lapply(tstcv_preds,FUN=function(P,A=tstcatheart[[9]]) {pred_class<-unlist(apply(round(P),1,which.max))-1 pred_tbl<-table(pred_class,A) pred_cfm<-caret::confusionMatrix(pred_tbl) pred_cfm }) tstcv.perf<-as.data.frame(do.call('rbind',lapply(tstcv_cfm,FUN=function(cfm) c(cfm$overall,cfm$byClass)))) cv.tst.perf<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2,mea n) cv.tst.perf.var<-apply(tstcv.perf[tstcv.perf$AccuracyPValue<0.01,-c(6:7)],2, sd) ################### Cross-Validation-naiveBayes file:///E:/users/rkannan/cuny/fall2020/fall2020/ML-Handbook/m09-cv-r... 7 of 7 11/23/2020, 5:27 PM

×