 ## R Programming solution on LASSO and Cross Validation Task

• 4th Nov, 2022
• 16:42 PM

rm(list = ls())
options(scipen = 999)
set.seed(1)

if(!('ISLR' %in% rownames(installed.packages()))) install.packages('ISLR')
library(ISLR)

if(!('glmnet' %in% rownames(installed.packages()))) install.packages('glmnet')
library(glmnet)

data(College)
College\$Private <- ifelse(College\$Private == 'Yes',1,0)

# Part(a): Split into train and test:
split_prop <- 0.7
idx <- sample.int(nrow(College),floor(split_prop*nrow(College)),FALSE)
train <- College[idx,]
test <- College[-idx, ]

y_train <- train[,2]
x_train <- as.matrix(train[,-2])
y_test <- test[,2]
x_test <- as.matrix(test[,-2])

# Part(b): Fit Lasso model:

fit <- glmnet(x_train, y_train, family = 'gaussian', alpha = 1)

# Part(c): Cross validation to choose lambda:
cross_validation <- cv.glmnet(x_train, y_train, family = 'gaussian', alpha = 1)
plot(cross_validation)
lambda_cv <- cross_validation\$lambda.min

# Part(d): Predicted values of test data based on best lambda:
fit_cv <- glmnet(x_train, y_train, family = 'gaussian', alpha = 1, lambda = lambda_cv)
pred_test <- predict(fit_cv,x_test)
test_MSE <- mean((pred_test - y_test)^2)

# Part(e):
# Test MSE of null model with lambda = infinity
fit_null <- glmnet(x_train, y_train, family = 'gaussian', alpha = 1, lambda = 1e8)
pred_test <- predict(fit_null,x_test)
test_MSE_null <- mean((pred_test - y_test)^2)

#Test MSE of least square regression model with lambda = 0:
fit_ols <- glmnet(x_train, y_train, family = 'gaussian', alpha = 1, lambda = 0)
pred_test <- predict(fit_ols,x_test)
test_MSE_ols <- mean((pred_test - y_test)^2)

# Part(f): Lasso model with whole data:
y <- College[,2]
x <- as.matrix(College[,-2])
fit_lasso <- glmnet(x, y, family = 'gaussian', alpha = 1, lambda = lambda_cv)
coef(fit_lasso)
fit_lasso[["df"]] #Number of non-zero co-effecient

# Part(g): Linear regression model with Lasso predictors:
temp <- coef(fit_lasso)
selected_predictors <- colnames(x)[(temp@i)]
data <- College[,c('Apps',selected_predictors)]
fit_lm <- lm(Apps ~ . , data)
summary(fit_lm)