#### import INTERNET dataset library(readxl) internet <- read_excel("../state-data.xlsx", "internet") # tidy the dataset - denote NUTS3 region id and gender as factors internet$nuts3 <- as.factor(internet$nuts3) internet$gndr <- as.factor(internet$gndr) # attach the dataset attach(internet) ### compute some descriptive statistics: # mean, variance, standard deviation mean(netustm) var(netustm) sd(netustm) # similar statistics per regions # using dplyr package (and its "pipe" %>% operator) library(dplyr) internet %>% group_by(nuts3) %>% summarize( count = n(), mean = mean(netustm), var = var(netustm), sd = var(netustm) ) # other usual descriptive statistics min(netustm) max(netustm) median(netustm) quantile(netustm, probs = c(0.25, 0.75)) IQR(netustm) # inter-quartile range range(netustm) # or shortly (Tukey's statistics) fivenum(netustm) summary(netustm) ### graphical visualizations boxplot(netustm) hist(netustm) plot(density(netustm)) # the same using ggplot2 library(ggplot2) ggplot(data = internet, aes(y = netustm)) + geom_boxplot() ggplot(data = internet, aes(x = netustm)) + geom_histogram(binwidth = 5, color = "white") ggplot(data = internet, aes(x = netustm)) + geom_density(fill = "blue") # some regional statistics ggplot(data = internet, aes(x = nuts3, y = netustm)) + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ggplot(data = internet, aes(x = netustm)) + geom_histogram(binwidth = 5, color = "white") + facet_wrap(~ nuts3)