I have performed the following cross-validation knn (using the caret package) on the iris dataset. I am now trying to plot the training and test error rates for the result. Here is my attempt but I cannot get the error rates. Can anyone help me please?
library(caret)
data(iris)
sample <- sample(2, nrow(iris), replace=TRUE, prob=c(0.80, 0.20))
iris.training <- iris[sample == 1, 1:4]
iris.test <- iris[sample == 2, 1:4]
iris.trainLabels <- iris[sample == 1, 5]
iris.testLabels <- iris[sample == 2, 5]
# Combine training data and combine test data.
iris_train <- cbind(iris.trainLabels, iris.training)
iris_test <- cbind(iris.testLabels, iris.test)
trControl <- trainControl(method = "cv", number = 5)
# K values 1 3 5 7 9
k_values <- seq(from=1, to=10, by=2)
fit <- train(iris.trainLabels ~ ., method = "knn", tuneGrid = expand.grid(k = k_values), trControl = trControl, data = iris_train)
# Plot
bestK <- function(iris_train, iris.trainLabels,
iris.testLabels) {
ctr <- c(); cts <- c()
for (k in length(k_values)) {
fit <- train(iris.trainLabels ~ ., method = "knn", tuneGrid = expand.grid(k = k_values), trControl = trControl, data = iris_train)
trTable <- prop.table(table(fit, iris.trainLabels))
tsTable <- prop.table(table(fit, iris.testLabels))
erTr <- trTable[1,2] + trTable[2,1]
erTs <- tsTable[1,2] + tsTable[2,1]
ctr <- c(ctr,erTr)
cts <- c(cts,erTs)
}
err <- data.frame(k=k_values, trER=ctr, tsER=cts)
return(err)
}
err <- bestK(iris_train, iris.trainLabels, iris.testLabels)
plot(err$k,err$trER,type='o',ylim=c(0,.5),xlab="k",ylab="Error rate",col="blue")
lines(err$k,err$tsER,type='o',col="red")