## This is a comment. demo(graphics) ## CS401 not a software engineering course, so you should write ## comments when you want to, eg to remember what you were doing, not ## for the sake of bulking up your files with comments that say things ## already apparent in the code. ## I will take off one point for each such comment. (Just kidding?) 1 2+3 1+3/5 sin(2) log(10) help(sin) help(log10) ## oddly given the spirit of R, coercion to complex is not automatic (-2)^0.5 (-2+0i)^0.5 ## Goof with c, the constructor c(3,4,6) c(c(1,3),4,6) help(c) x <- c(100,101,102,103,104,105) + c(0.1) length(x) length(c(x,x,x,x)) max(x) max(1,2,x) ## strings are their own datatype, not vectors of characters c("http://www.r-project.org", "is", "the", "web", "site", "for", "R") ## easy to plot stuff x=c(0:10, 10*(2:10), 100*(2:10)) plot(x,x^2) seq(2,5, by=1/4) rep(c(1,2),3) 1<2 x<-c(1,2,3,NA,NA,6,7,8) is.na(x) 1/0 0/0 ## first class functions, woop de do! compose <- function(f,g) function(x) f(g(x)) compose(sin,exp)(2) sinexp <- compose(sin,exp) x <- seq(0,1,by=.01) x1 <- x[-1] plot(x1, x1^3, log="xy") c() length(numeric(0)) c(1,numeric(0),2) c(1,NULL,2) ## We can everlay plots plot(x1, x1^3, type="o") lines(x1, x1^2) lines(x1, x1^1.2) lines(x1, x1^0.5) lines(x1, x1^0.2) lines(x1, x1^5) lines(x1, sinexp(3*x1)) ## our first control structure: for plot(x1, x1^3, type="o") for(p in c(0.2,05,1.2,2,5)) lines(x1, x1^p) ## this swizzles graphical output to a file instead of the screen, ## which produces much better printouts, or figures to include in ## another document, than would an (ugh) screen shot help(postscript) help(pdf) ## gaussian pdf, very pretty x<-seq(-5,5,by=.01) plot(x, dnorm(x), type="l") plot(x, log(dnorm(x)), type="l") ## use numeric integration to see how much of a gaussian is within ## 1/2/3/4 standard deviations of the mean. sum(dnorm(x,0,5/1))*(x[2]-x[1]) sum(dnorm(x,0,5/2))*(x[2]-x[1]) sum(dnorm(x,0,5/3))*(x[2]-x[1]) sum(dnorm(x,0,5/4))*(x[2]-x[1]) ## The following implies that if IQ is gaussian distributed with a ## mean of 100 and a standard deviation of 10 then about 1 in 30,000 ## people has an IQ of 140 or above. 1/(1-sum(dnorm(x,0,5/4))*(x[2]-x[1])) ## histogram samples of different sizes hist(rnorm(50),prob=TRUE) hist(rnorm(500),prob=TRUE) hist(rnorm(5000),prob=TRUE) hist(rnorm(50000),prob=TRUE) hist(rnorm(50000),prob=TRUE,breaks=100) hist(rnorm(5000),prob=TRUE,breaks=100) hist(rnorm(1000),prob=TRUE,breaks=100) hist(rnorm(100),prob=TRUE,breaks=100) stdev<-compose(sqrt,var) stdev(rnorm(10000)) stdev(rnorm(10000)) stdev(rnorm(10000)) stdev(rnorm(10000)) ## MAP the function F over the elements of the vector X. ## (There is probably a builtin for this.) ## Recursive definition ##map<-function(f,x) if(length(x)==0) x else c(f(x[1]), map(f,x[-1])) ## Iterative definition. Note mutation of vector elements. Note ## pre-allocation of vector before assigning to its elements. map<-function(f,x) { n<-length(x); y<-1:n; for(i in 1:n) y[i] <- f(x[i]); y } ## Run something at different sample sizes xs=floor( (10^(1/4))^(3:16) ) map(compose(stdev,rnorm),xs) ## That gives a different result than would ##map(stdev, map(rnorm,xs)) ## due to the way c() works. Quite peculiar when you think about it! ## Let's get a feel for how much the mean of a sample varies ## trial-to-trial due to sample size. plot(xs,map(compose(mean,rnorm),xs), type="p", log="x", xlab="sample size", ylab="sample mean") for(i in 1:10) lines(xs,map(compose(mean,rnorm),xs), type="p") hist(map(compose(mean,rnorm),rep(10,1000)),prob=TRUE,breaks=50) hist(map(compose(mean,rnorm),rep(50,1000)),prob=TRUE,breaks=50) hist(map(compose(mean,rnorm),rep(100,1000)),prob=TRUE,breaks=50) hist(map(compose(mean,rnorm),rep(1000,1000)),prob=TRUE,breaks=50) hist(map(compose(mean,rnorm),rep(1000,1000)),prob=TRUE,breaks=50) hist(map(compose(mean,rnorm),rep(5000,1000)), prob=TRUE, breaks=50, xlab="Sample Mean", ylab="Frequency", main="Variability of 5000-element Sample", sub="(1000 Trials)") ## generate pretty histogram of empirical sample means at given sample size hist_sample_means <- function(nsamples, ntrials) { hist(map(compose(mean,rnorm), rep(nsamples,ntrials)), prob=TRUE, breaks=50, main=paste("Variability of", paste(nsamples, "Element", sep="-"), "Sample"), xlab="Sample Mean", sub=paste("(", ntrials, " Trials)", sep=""), ylab="Frequency") } hist_sample_means(10,1000) hist_sample_means(10,1000) hist_sample_means(10,1000) hist_sample_means(10,2000) hist_sample_means(10,10000)