# My first R command 4+6 # Using the help system help("median") ############################ # Assignment operator: <- # ############################ a <- 5 a x <- c(1, 2, 3, 4, 5) x words <- c("eins", "zwei", "drei", "vier", "fuenf") y <- c(1:100) y z <-seq(from=1, to=100, by=0.5) z w <- rep(c(1,2,3), 10) w length(w) ############################# # Index operator: [...] # ############################# w[1:4] w[seq(from=2, to=17, by=3)] ######################## # Vector operations # ######################## x<-c(1,2,3) y<-c(2,4,6) x + y 10*x x*y x < 3 ########################## # Programming constructs # ########################## # Branching with "if / else" if(x[2] < 100) { r <- 1 } else { r <- 0 } r #for loops for (i in c(1:3)){ print(x[i]) } for (i in c(1:5)){ print(words[i]) } ############################ # Mathematical functions # ############################ log(exp(1)) a<-9 sqrt(a) ############################ # Generate random numbers # ############################ x <- rnorm(100) # norm: Normal distributions x #################################### # Pseudo-random sampling: sample() # #################################### sample(x, 20, replace=FALSE) #without replacement sort(sample(x, 20, replace=TRUE)) #with replacement x_part <- x[1:10] # Use your own sampling probabilities: prob_x <- c(0.004, 0.9, 0.3, 0.001, 0.6, 0.005, 0.12, 0.1, 0.0006, 1) x_part sample(x_part, replace=TRUE, prob=prob_x) ############################ # Elementary graphics # ############################ x <- seq(0, 10, 0.1) x x_sample <- x[sample(1:length(x))] x_sample y <- -1.5 + 4*x + rnorm(length(x), 0, 5) plot(x, y) # rnorm: generate Gaussian random numbers # runif: generate uniform random numbers hist(y) #Histogram: absolute / relative frequencies in classes hist(y, freq=FALSE) ########################## # Descriptive Statistics # ########################## x <- rnorm(100) plot(x) #Plot values in x against their index (uninformative) hist(x, freq=FALSE) #relative frequencies in classes mean(x) #arithmetic mean of values in x max(x) min(x) range(x) # = c(min(x), max(x)) median(x) # Value in the middle of the ordered sample of x x_ordered = sort(x) x_ordered length(x) 0.5*(x_ordered[50] + x_ordered[51]) median(x) ############################################### # Cumulative distribution function (cdf.): # # P(Z <= z ) # # # # empirical cdf. (ecdf.): relative frequency # # of elements in a sample that do not exceed # # the argument # ############################################### #Plot the ecdf. of x: range(x) x_ordered <- sort(x) n<-length(x_ordered) rel_freq<-rep(0, n) for (j in c(1:n)){ rel_freq[j] <- sum(x <= x_ordered[j]) / n } plot(x_ordered, rel_freq) #More elegant solution: help("cumsum") plot(unique(sort(x)), cumsum(table(x)) / length(x), xlab="x", ylab="ecdf(x)",col="blue") #Even more elegant solution: help("ecdf") plot(ecdf(x), add=TRUE, col="red") ################################# # Some higher level graphics # ################################# set.seed(123) # Set the initial value of the # random number generator in R x <- rnorm(100) # 100 Gaussians par(las = 1) #Customized histogram hist(x, main="Histogram and Density", freq=FALSE, col="grey", ylab="Density", xlim=c(-5, 5), ylim=c(0, 0.6)) # Theoretical density curve(dnorm, from=-5, to=5, add=TRUE, lwd=3, lty=2) legend(-4.5, 0.55, legend=c("emp. histogram", "theor. density"), col=c("grey", "black"), lwd=5) #################################### # Probability distributions in R # #################################### # Prefix "r" - random number generation # Prefix "p" - (cumulative) probability: F_X(x) = P(X <= x) # Prefix "d" - density (Lebesgue density or counting density) # Prefix "q" - quantile (generalized inverse of the cdf.) # Commands are built by Prefix + Name of the distribution #################################### # Examples: discrete distributions # #################################### # Binomial distribution: binom p <- 0.2 n <- 30 support <- c(0:n) help("pbinom") plot(support, pbinom(support, size=n, prob=p), xlab="argument", ylab="binomial prob.") points(support, dbinom(support, size=n, prob=p), col="blue") # mathem. relationship between density and cdf: pbinom(support, size=n, prob=p) cumsum(dbinom(support, size=n, prob=p)) rand_numbers <-rbinom(n=500, size=n, prob=p) plot(ecdf(rand_numbers)) points(support, pbinom(support, size=n, prob=p), col="blue") # Poisson distribution: pois help("dpois") lambda <- 2.5 plot(support, dpois(support, lambda)) plot(support, ppois(support, lambda), xlab="argument", ylab="Poisson prob.") points(support, dpois(support, lambda), col="red") rand_numbers <-rpois(n=500, lambda) plot(ecdf(rand_numbers)) points(support, ppois(support, lambda), col="blue") # (discrete) Laplace distribution: # sample(c(1:n), replace=TRUE) upper <- 1000 Laplace <- sample(c(1:upper), replace=TRUE) plot(Laplace) plot(sort(Laplace)) ####################################### # Examples: continuous distributions # ####################################### # Exponential distribution: exp help("dexp") lambda <- 0.2 # Expected value equals 1/lambda = 5 support <- seq(from=0, to=15, by=0.1) plot(support, dexp(support, lambda), type="l") curve(dexp(x, lambda), from=0, to=15, n=length(support)) rand_numbers <- rexp(n=10000, rate=lambda) mean(rand_numbers) argument <- 4.1 pexp(argument, rate=lambda) f <- function(x) dexp(x, rate=lambda) integrate(f, lower=0, upper=argument) # Normal distribution: norm mu <- 3.0 sigma <- 0.2 support <- seq(from=mu-3*sigma, to=mu+3*sigma, by=0.01) plot(support, dnorm(support, mean=mu, sd=sigma), type="l", ylab="Gaussian bell curve") plot(support, pnorm(support, mean=mu, sd=sigma), type="l", ylab="sigmiodal Gaussian cdf.") qnorm(0.5, mean=mu, sd=sigma) qnorm(0.8, mean=mu, sd=sigma) # (Continuous) uniform distribution: unif help("runif") a <- 2 b <- 5 support <- seq(from=a, to=b, by=0.01) 1/(b-a) # = 1/3 = constant value of the density plot(support, dunif(support, min=a, max=b), col="red", type="l", ylab="uniform density") plot(support, punif(support, min=a, max=b), col="blue", type="l", ylab="cdf. of a uniform distribution") g <- function(x) punif(x, min=a, max=b) plot_range <- seq(from=a-1, to=b+1, by=0.01) plot(plot_range, dunif(plot_range, min=a, max=b), col="red", type="l", xlim=c(a-1, b+1), ylim=c(-0.1, 1)) curve(g, from=a-1, to=b+1, n=length(support), add=TRUE, col="blue") dunif(1.0, min=a, max=b) # = 0 punif(1.0, min=a, max=b) # = 0