Login
Order Now
Support
R Studio Solution for Data Engineering and Mining Task

R Studio Solution for Data Engineering and Mining Task

  • 17th Nov, 2022
  • 17:26 PM

#  DIGIT RECOGNIZER  #

rm(list = ls())
set.seed(1001)
setwd("~/Assignments/Job 1")
library(data.table)
library(FNN)
library(caret)

# Load data:
data <- as.matrix(data.frame(fread('data/train.csv')))
data <- data[seq(10,nrow(data),by = 10),]
y_label <- 'label'

## DATA PRE-PROCESSING:

# 01. Removing columns with zero-variance:
idx <- (apply(data, 2,function(x){sd(x)}) == 0) # columns having zero-variance
data <- data[,!idx] # removing columns having zero-variance

# 02. Use PCA to reduce dimension:
x = as.matrix(data[,-1])
y = as.factor(data[,y_label])
pca_data = scale(x)
pca = prcomp(pca_data)
cumpro <- cumsum(pca$sdev^2 / sum(pca$sdev^2))
plot(cumpro[0:200], xlab = "Principal components", ylab = "Amount of explained variance", main = "Cumulative variance plot")
pca_x <- x %*% pca$rotation[,1:100] # taking top 100 PCA axis

# 03. Splitting the train data into 70:30 
prop <- 0.7
idx <- sample.int(nrow(pca_x),ceiling(prop*nrow(pca_x)),FALSE)
train.x <- pca_x[idx,]
test.x <- pca_x[-idx,]
train.y <- y[idx]
test.y <- y[-idx]

###########################
## NAIVE BAYES ESTIMATOR ##
###########################
start <- proc.time()
model_nb <- train(train.x,train.y,'nb',trControl=trainControl(method='cv',number=10))

pred_test <- predict(model_nb,test.x)
pred_train <- predict(model_nb,train.x)
end <- proc.time()
time_nb <- end - start

train_acc_nb <- confusionMatrix(train.y,pred_train)
test_acc_nb <- confusionMatrix(test.y,pred_test)

###########################
## K-NEAREST NEIGHBOUR ####
###########################
start <- proc.time()

model_knn <- train(train.x,train.y, 
                   method = "knn", 
                   trControl = trainControl(method='cv',number=10),
                   tuneLength = 20)

pred_train <- predict(model_knn,train.x)
pred_test <- predict(model_knn,test.x)
end <- proc.time()
time_knn <- end - start

train_acc_knn <- confusionMatrix(train.y,pred_train)
test_acc_knn <- confusionMatrix(test.y,pred_test)

###########################
## SUPPORT VECTOR MACHINE #
###########################
library(e1071) 

start <- proc.time()
model_svm <- train( x = train.x,y = train.y, 
                    method = "svmLinear", 
                    trControl = trainControl(method='cv',number=10)
              )

pred_test <- predict(model_svm,test.x)
pred_train <- predict(model_svm,train.x)

end <- proc.time()
time_svm <- end - start

train_acc_svm <- confusionMatrix(train.y,pred_train)
test_acc_svm <- confusionMatrix(test.y,pred_test)

##########################
##      COMPARISON      ##
##########################
train_acc_nb$overall['Accuracy']
train_acc_knn$overall['Accuracy']
train_acc_svm$overall['Accuracy']

test_acc_nb$overall['Accuracy']
test_acc_knn$overall['Accuracy']
test_acc_svm$overall['Accuracy']

time_nb
time_knn
time_svm
 

Share this post

assignment helpassignment helperassignment expertsassignment writing services