
R Studio Homework Help on Data Science
- 27th Jan, 2022
- 15:22 PM
## ----------------------------------------------------------------- library(tidyverse) ## ----------------------------------------------------------------- library(readxl) task1 <- read_excel("Downloads/TASK 1 .xlsx") dim(task1) ## ----------------------------------------------------------------- str(task1) ## ----------------------------------------------------------------- # Emotion 1 summary(task1$emotions_1) table(task1$emotions_1) # duration summary(task1$duration) ## ----------------------------------------------------------------- apply(task1[, c("emotions_1", "emotions_2", "emotions_3", "emotions_4", "emotions_5", "emotions_6", "emotions_7", "emotions_8", "emotions_9", "emotions_7r", "emotions_8r", "emotions_9r")] , 2, table) ## ----------------------------------------------------------------- temp_function <- function(col){ col %in% "NA" } task1[apply(task1, 2, temp_function) == T] = NA task1 <- task1[complete.cases(task1),] ## ----------------------------------------------------------------- #Mean mean(task1$duration) # standard devation sd(task1$duration) # Range max(task1$duration) - min(task1$duration) ## ----------------------------------------------------------------- table(task1$gender) ## ----------------------------------------------------------------- summary(task1$duration[task1$gender == 1]) ## ----------------------------------------------------------------- summary(task1$duration[task1$gender == 2]) ## ----------------------------------------------------------------- task1 %>% ggplot(aes(x = factor(gender), y = duration)) + geom_boxplot() ## ----------------------------------------------------------------- task1[task1$duration < 25000>% ggplot(aes(x = factor(gender), y = duration)) + geom_boxplot() ## ----------------------------------------------------------------- # First we will convert variables to numeric type task1[,c(5:90)] <- apply(task1[,c(5:90)], 2, as.numeric) task1 = task1 %>% mutate(emotions_mean = rowMeans(select(task1, c("emotions_1", "emotions_2", "emotions_3", "emotions_4", "emotions_5", "emotions_6", "emotions_7", "emotions_8", "emotions_9" , "emotions_7r", "emotions_8r", "emotions_9r")))) task1 = task1 %>% mutate(AN_mean = rowMeans(select(task1, c( "AN_1", "AN_2", "AN_3", "AN_4", "AN_5", "AN_6", "AN_7", "AN_8", "AN_9", "AN_10", "AN_11" )))) ## ----------------------------------------------------------------- cor(task1$emotions_mean, task1$AN_mean) ## ----------------------------------------------------------------- #Writing .csv file write.csv(task1, file = "studentid_task1_.csv")