#---------------------------------------------------- #Solutions to R course exercises # Author: Diana Marek # Data: December 2016 #---------------------------------------------------- #--------------------------------------------- #Syntax and Scripts #--------------------------------------------- # #Assign the values 6.7 and −56.3 to variables a and b, respectively # a <- 6.7 # b <- -56.3 # # #Calculate (2*a)/b +(a*b) and assign the results to variable x # x <- 2*a/b + a*b # # #Use help.search() to find out how to compute the square root of variables and compute the square root of a and b # help.search("squareroot") # help.search("root") # #Note: it is really hard to find this info! -> Google is your friend! # # sqrt(a) # sqrt(b) # it can be computed as b is negative # # # Calculate log(x) and assign the result to variable y. # y <- log(x) # # #Assign the values 75 and 0.1 to the variables u and v, respectively, and to print u power v # u <- 75 # v <-0.1 # u^v #--------------------------------------------- #Vectors #--------------------------------------------- #Create two vectors a and b containing the values from −5 to 5 and from 10 down to 0, respectively. a <- -5:5 b <- 10:0 #Calculate the difference, sum and product between the elements of a and b. a-b a+b a*b #Calculate the sum of all elements in a and b. sum(a,b) #Note: sum(c(a,b)) works fine as well. #Identify the largest and smallest values among both vectors and computer the overall mean. max(a,b) min(a,b) mean(c(a,b)) #Note: in contrast to min and max, mean, median, sum etc. mean only accept a single vector! #Create a vector x containing 100 random numbers uniformly distributed in [1,2] x <- runif(100, min=1, max=2) #Alternative: x <- runif(100)+1 x #Standardize x such that it has mean 0 and sd 1. x <- (x - mean(x))/sd(x) #Create a vector y such that y[i]=x[i]+ e[i] where e[i] is a normally distributed random number with mean 0 and sd 0.2. y <- x + rnorm(length(x), sd=0.2) #Compute the covariance between x and y cov(x,y) #Create a numerical vector f containing the elements 1, −1, 2, −2, . . . , 100, −100 f <- c(1,-1) * rep(1:100, each=2) #Create a vector of 100 elements that contains the numbers 1,2 and 3 in random order, but with twice as many 1s than 2s or 3s. sample(1:3, 100, prob=c(0.5,0.25,0.25), replace=TRUE) #--------------------------------------------- #Vectors and Matrices #--------------------------------------------- #Create two vectors x and y containing 1000 random numbers normally distributed with sd=1 and mean=0 and mean=1, respectively. x <- rnorm(1000, mean=0); y <- rnorm(1000, mean=1); #Calculate the number of pairs (x[i], y[i]) where y[i]>x[i]. sum(y>x) #Calculate the number of values in y that are larger than the largest value in x. sum(y > max(x)) # length(y[y>max(x)]) works as well #Sort vector y by the elements of vector x. y[order(x)] # alternative with sort : y[sort(x,index.return=TRUE)$ix] #Create a vector z with all 999 differences between the neighboring elements of x such that z[1]=x[2]-x[1], z[2]=x[3]-x[2], . . .. z<-x[2:length(x)] - x[1:(length(x)-1)] #Create a character vector a with elements A_1, A_2, B_1, B_2, C_1, C_2, D_1, D_2. a<-paste(rep(c('A','B','C','D'),each=2),1:2,sep="_") a #Create a matrix m1 10 by 10 with sampled numbers from 1:50. Create a matrix m2 of zeros, 10 by 10 with a diagonal of 1. #Then subtract m2 from m1 and retrieve the diagonal vector and sum its elements. m1<-matrix(sample(50),nrow=10,ncol=10) m2<-diag(1,dim(m1)) sum(diag(m1-m2)) #--------------------------------------------- #Data Frames #--------------------------------------------- #Create a data frame d containing a vector id with values 1,2,...20, followed by two vectors x and y both containing #numbers (integers) uniformly distributed within [-5,5] d <- data.frame(id=1:20, x=sample(-5:5, 20, replace=TRUE), y=sample(-5:5, 20, replace=TRUE)) #Add an additional vector ok where each element is TRUE with a probability of 0.7 and FALSE otherwise. d <- cbind(d, ok=sample(c(TRUE,FALSE), size=20, replace=TRUE, prob=c(0.7,0.3))) # d <- cbind(d, ok=runif(20)<0.7) works as well #Replace all elements of y with y[i] squared. d$y <- d$y^2 #Create a data frame e that contains both id and y, but only for those entries with y>0.5 AND ok==true e <- d[d$y>0.5 & d$ok==TRUE,c(1,3)] #Save d to your working directory as my_data_frame.txt. Quit RStudio and use a text editor to add two additional #entries (lines). Start RStudio again and import the modified data frame as g write.table(d, "my_data_frame.txt", row.names=FALSE) #Use a text editor to add two additional entries to the txt and reload the file in R g <- read.table("my_data_frame.txt", header=TRUE) # Save g to your working directory as my_data_frame.csv write.csv(g, "my_data_frame.csv",row.names=FALSE)