################################################################## ### using R as a calculator 1 + 1 log(2) log(1000, 10) 25/3 25%/%3 25%%3 # creating an R object and assigning it the value houda <- 42; houda # manipulating objects houda + 2 houda <- 4 + 6 houda ################################################################## ### loading datasets ## a) using RStudio/Rcmdr import feature (preferred) # aka using readxl::read_excel (load the package readxl and use the function read_excel) library(readxl) scores <- read_excel(path = "sample-data.xlsx", sheet = "scores") # shorter way (assuning a right order of the function parameters) scores <- read_excel("sample-data.xlsx", "scores") # if you don't know where the file resides scores <- read_excel(file.choose(), "scores") ## b) using read.csv/read.csv2 function (backsafe, always working) # first export the desired Excel sheet to CSV format # read.csv for CSV comma delimited and decimal point (English format) # read.csv2 for CSV semicolon delimited and decimal comma (German format) scores <- read.csv2("scores.csv") ## d) by the direct input (backsafe, always working but unhandy) scores <- data.frame (student = c(1:8), gpa = c(2.8, 3.4, 3, 3.5, 3.6, 3, 2.7, 3.7), ACT = c(21, 24, 26, 27, 29, 25, 25, 30)) ################################################################## ### working with datasets # view the whole dataset (don't try for large datasets!) scores # or use the View feature View(scores) # list only several first rows head(scores) # list the names of the variables names(scores) # access a particular variable scores$gpa # and a particular value scores$gpa[2] # subset of a dataset / a variable scores[1:5,] # only observation number from 1 to 5 selected scores[scores$student %% 2 == 0,] # only observation with even student number selected scores[scores$ACT > 25,] # only students with ACT > 25 selected scores[scores$ACT > 25,]$gpa # and the corresponding gpa values # let work directly with variables attach(scores) gpa gpa[2] # don't need the access anymore detach(scores) # (due to masking, try to avoid naming the dataset with the same name as one of the variables)