
R Programming Homework Help on K-Nearest Neighbors
- 31st Mar, 2022
- 16:55 PM
bank.df = read.csv("UniversalBank.csv", header = T) dim(bank.df) head(bank.df) #removing the column of ID and ZIP Code bank.df <- bank.df[,-c(1, 5)] bank.df set.seed(12345) # Reordering the variables by putting the response variable in the last bank.df <- bank.df[ , c(1:7, 9:12, 8)] bank.df # Looking at the new order t(t(names(bank.df))) #checking the missing values sum(is.na(bank.df)) #Partitioning the data into training and validation sets train.index<- sample(row.names(bank.df), 0.6*dim(bank.df)[1]) valid.index<- setdiff(row.names(bank.df), train.index) train.df<- bank.df [train.index, ] valid.df<- bank.df [valid.index, ] new.df<- data.frame(Age= 40, Experience= 10, Income=84, Family = 2, CCAvg=2, Education= 2, Mortgage =0, SecuritiesAccount= 0, CDAccount =0, Online=1, CreditCard= 1) #initialize normalized training, validation data, complete data frames to originals train.norm.df<- train.df valid.norm.df<- valid.df bank.norm.df<- bank.df library(caret) library(lattice) library(ggplot2) #use preProces from Caret package to normalize variables norm.values<- preProcess(train.df[,1:11], method = c("center", "scale")) train.norm.df[, 1:12] <- predict(norm.values, train.df[, 1:12]) valid.norm.df[, 1:12] <- predict(norm.values, valid.df[, 1:12]) bank.norm.df[, 1:12] <- predict(norm.values, bank.df[, 1:12]) new.norm.df<- predict(norm.values, new.df) #Accuracies for odd values of k accuracy.df<- data.frame (k= seq(1,19,2), accuracy=rep(0,10)) library(FNN) for (i in 1:10){ knn.pred<- knn(train.norm.df[, 1:11], valid.norm.df[, 1:11], cl= train.norm.df [, 12], k= accuracy.df$k[i]) accuracy.df[i, 2] = confusionMatrix(factor(knn.pred), factor(valid.norm.df[, 12]))$overall[1] } #Finding the best value of k accuracy.df$k[which.max(accuracy.df$accuracy)]