#----------------------------------------------------
#Solutions to R course exercises
# Author: Diana Marek
# Data: December 2016
#----------------------------------------------------

#---------------------------------------------
#Syntax and Scripts
#---------------------------------------------
# #Assign the values 6.7 and −56.3 to variables a and b, respectively
# a <- 6.7
# b <- -56.3
# 
# #Calculate (2*a)/b +(a*b) and assign the results to variable x
# x <- 2*a/b + a*b
# 
# #Use help.search() to find out how to compute the square root of variables and compute the square root of a and b
# help.search("squareroot")
# help.search("root")
# #Note: it is really hard to find this info! -> Google is your friend!
# 
# sqrt(a)
# sqrt(b)  # it can be computed as b is negative
# 
# # Calculate log(x) and assign the result to variable y.
# y <- log(x)
# 
# #Assign the values 75 and 0.1 to the variables u and v, respectively, and to print u power v 
# u <- 75
# v <-0.1
# u^v

#---------------------------------------------
#Vectors
#---------------------------------------------
#Create two vectors a and b containing the values from −5 to 5 and from 10 down to 0, respectively.
a <- -5:5
b <- 10:0

#Calculate the difference, sum and product between the elements of a and b.
a-b
a+b
a*b 

#Calculate the sum of all elements in a and b.
sum(a,b) #Note: sum(c(a,b)) works fine as well.

#Identify the largest and smallest values among both vectors and computer the overall mean.
max(a,b)
min(a,b)
mean(c(a,b)) #Note: in contrast to min and max, mean, median, sum etc. mean only accept a single vector!

#Create a vector x containing 100 random numbers uniformly distributed in [1,2]
x <- runif(100, min=1, max=2) #Alternative: x <- runif(100)+1
x

#Standardize x such that it has mean 0 and sd 1.
x <- (x - mean(x))/sd(x)

#Create a vector y such that y[i]=x[i]+ e[i] where e[i] is a normally distributed random number with mean 0 and sd 0.2.
y <- x + rnorm(length(x), sd=0.2)

#Compute the covariance between x and y
cov(x,y)

#Create a numerical vector f containing the elements 1, −1, 2, −2, . . . , 100, −100
f <- c(1,-1) * rep(1:100, each=2)

#Create a vector of 100 elements that contains the numbers 1,2 and 3 in random order, but with twice as many 1s than 2s or 3s.
sample(1:3, 100, prob=c(0.5,0.25,0.25), replace=TRUE)


#---------------------------------------------
#Vectors and Matrices
#---------------------------------------------
#Create two vectors x and y containing 1000 random numbers normally distributed with sd=1 and mean=0 and mean=1, respectively.
x <- rnorm(1000, mean=0);
y <- rnorm(1000, mean=1);

#Calculate the number of pairs (x[i], y[i]) where y[i]>x[i].
sum(y>x)

#Calculate the number of values in y that are larger than the largest value in x.
sum(y > max(x))  # length(y[y>max(x)]) works as well

#Sort vector y by the elements of vector x.
y[order(x)] # alternative with sort : y[sort(x,index.return=TRUE)$ix]

#Create a vector z with all 999 differences between the neighboring elements of x such that z[1]=x[2]-x[1], z[2]=x[3]-x[2], . . ..
z<-x[2:length(x)] - x[1:(length(x)-1)]

#Create a character vector a with elements A_1, A_2, B_1, B_2, C_1, C_2, D_1, D_2.
a<-paste(rep(c('A','B','C','D'),each=2),1:2,sep="_")
a
#Create a matrix m1 10 by 10 with sampled numbers from 1:50. Create a matrix m2 of zeros, 10 by 10 with a diagonal of 1. 
#Then subtract m2 from m1 and retrieve the diagonal vector and sum its elements.

m1<-matrix(sample(50),nrow=10,ncol=10)
m2<-diag(1,dim(m1))
sum(diag(m1-m2))

#---------------------------------------------
#Data Frames
#---------------------------------------------
#Create a data frame d containing a vector id with values 1,2,...20, followed by two vectors x and y both containing 
#numbers (integers) uniformly distributed within [-5,5]
d <- data.frame(id=1:20, x=sample(-5:5, 20, replace=TRUE), y=sample(-5:5, 20, replace=TRUE))

#Add an additional vector ok where each element is TRUE with a probability of 0.7 and FALSE otherwise.
d <- cbind(d, ok=sample(c(TRUE,FALSE), size=20, replace=TRUE, prob=c(0.7,0.3))) # d <- cbind(d, ok=runif(20)<0.7) works as well

#Replace all elements of y with y[i] squared.
d$y <- d$y^2

#Create a data frame e that contains both id and y, but only for those entries with y>0.5 AND ok==true
e <- d[d$y>0.5 & d$ok==TRUE,c(1,3)]

#Save d to your working directory as my_data_frame.txt. Quit RStudio and use a text editor to add two additional 
#entries (lines). Start RStudio again and import the modified data frame as g
write.table(d, "my_data_frame.txt", row.names=FALSE)

#Use a text editor to add two additional entries to the txt and reload the file in R
g <- read.table("my_data_frame.txt", header=TRUE)

# Save g to your working directory as my_data_frame.csv
write.csv(g, "my_data_frame.csv",row.names=FALSE)