library(caret) library(doMC) library(pROC) library(kernlab) library(plyr) registerDoMC(cores=3) theme_set(theme_bw()) ################################################################### get_roc <- function(mod, dat) { class_probs <- predict(mod, dat, type = "prob")[, 1] roc_obj <- roc(dat$Class, class_probs, direction = ">") as.vector(roc_obj$auc) } ################################################################### sims <- 50 set.seed(1333) tr <- twoClassSim(500) initial <- train(Class ~ ., data = tr, method = "svmRadial", preProc = c("center", "scale"), tuneLength = 10, metric = "ROC", trControl = trainControl(method = "repeatedcv", repeats = 5, returnResamp = "final", classProbs = TRUE, summaryFunction = twoClassSummary)) grid <- initial$results[, c("C", "sigma")] sim_res0 <- data.frame(sim = 1:sims, RCV = rep(NA, sims), Boot = rep(NA, sims), Truth = rep(NA, sims), Test = rep(NA, sims), Cost = rep(NA, sims)) all_res <- vector(mode = "list", length = nrow(grid)) index <- 0 for(j in 1:nrow(grid)) { index <- index + 1 sim_res <- sim_res0 for(i in 1:sims) { if(index %% 10 == 0) { cat(".") save(all_res, file = "~/tmp/resample_sim.RData") } if(index %% 100 == 0) cat("\n") set.seed(3412+i) tr <- twoClassSim(500) te <- twoClassSim(floor(nrow(tr)*.25)) lg <- twoClassSim(50000) set.seed(131+i) mod <- train(Class ~ ., data = tr, method = "svmRadial", preProc = c("center", "scale"), tuneGrid = grid[j,], metric = "ROC", trControl = trainControl(method = "repeatedcv", repeats = 5, returnResamp = "final", classProbs = TRUE, summaryFunction = twoClassSummary)) set.seed(131+i) modb <- train(Class ~ ., data = tr, method = "svmRadial", preProc = c("center", "scale"), tuneGrid = grid[j,], metric = "ROC", trControl = trainControl(method = "boot", number = 50, returnResamp = "final", classProbs = TRUE, summaryFunction = twoClassSummary)) sim_res[i, "RCV"] <- getTrainPerf(mod)[1, "TrainROC"] sim_res[i, "Boot"] <- getTrainPerf(modb)[1, "TrainROC"] sim_res[i, "Truth"] <- get_roc(mod, lg) sim_res[i, "Test"] <- get_roc(mod, te) sim_res$Cost <- grid[j,"C"] } all_res[[index]] <- sim_res cat("\n") } all_res <- do.call("rbind", all_res) save(all_res, file = "~/tmp/resample_sim.RData") rcv_res <- plyr::ddply(all_res, .(Cost), function(x) c(pct = mean((x$RCV - x$Truth)/x$Truth)*100, cor = cor(x$RCV, x$Truth), RMSE = RMSE(x$RCV, x$Truth), bias = mean(x$RCV- x$Truth), sd = sd(x$RCV), RCV = mean(x$RCV), Truth = mean(x$Truth), Test = mean(x$Test))) boot_res <- plyr::ddply(all_res, .(Cost), function(x) c(pct = mean((x$Boot - x$Truth)/x$Truth)*100, cor = cor(x$Boot, x$Truth), RMSE = RMSE(x$Boot, x$Truth), bias = mean(x$Boot- x$Truth), sd = sd(x$Boot), Boot = mean(x$Boot), Truth = mean(x$Truth), Test = mean(x$Test))) save(rcv_res, boot_res, all_res, file = "~/tmp/resample_sim.RData") if(!interactive()) q("no")