- 20th Sep 2022
- 06:03 am
# Question 1
attach(final_data)
ranking <- as.character(Ratings)
ranking[Ratings==5 | Ratings==4] <- "High Rank"
ranking[Ratings==3 ] <- "Medium Rank"
ranking[Ratings==1 | Ratings==2] <- "Low Rank"
final_data <- cbind(final_data, ranking)
temp = final_data %>% group_by(ranking) %>% summarise(positive = sum(positive), negative = sum(negative))
temp = temp[1:3,]
barplot(temp$positive, names.arg = temp$ranking, main = "Bar Plot fro Positive Score", ylab = "Score")
barplot(temp$negative, names.arg = temp$ranking,main = "Bar Plot fro Negative Score", ylab = "Score")
## Interpretaion: We can see that low ranking reviewes have high negative reviews and high
## ranking reviews have high positive score. This is completely expected. Ranking is assumed to be 3 scale rating.
## 5 and 4 is high rank, 3 is medium rank and 1,2 is low rank
# Question 2
final_data$Timely_Delivery <- final_data$Timely_Delivery
temp2 = table(final_data$ranking, final_data$Timely_Delivery)
print(temp2)
barplot(temp2, legend = rownames(temp2), col = c("green","red", "blue"), names.arg = c("Timely Delivery","Delayed"))
## Clearly the timely delivery affect the reviews. All timely delivery have high raking and no medium or low rankig.
## In contrast, none of the delayed delivery have high ranking. Most of them are low and some are medium.
## This means delivery time is very important.
# Question 3 --------------------------------------------------------------
library("tm")
library("SnowballC")
library("wordcloud")
library("RColorBrewer")
text2 <- final_data$Reviews
full_text <- " "
for(i in 1:length(text2)){
full_text = paste(full_text, text2[i], sep = " ")
}
docs <- Corpus(VectorSource(full_text))
dtm <- TermDocumentMatrix(docs)
m <- as.matrix(dtm)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
head(d, 10)
wordcloud(words = d$word, freq = d$freq, min.freq = 1,
max.words=50, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))