data(iris) ### one-way anova: detailed calculation y = iris$Sepal.Width group = iris$Species y.mean.group = aggregate(y,by=list(group),mean) n.ele.group = aggregate(y,by=list(group),length) sst = sum((y-mean(y))^2) ssb = sum((y.mean.group[,2]-mean(y))^2*n.ele.group[,2]) sse = sst - ssb msb = ssb/(length(unique(group))-1) mse = sse/(nrow(iris)-length(unique(group))) F= msb/mse qf(0.95,df1=2,df2=147) pvalue=pf(F,lower.tail=FALSE,df1=2,df2=147) ##the easy way anova.model = aov(Sepal.Width~Species,data=iris) summary(anova.model) ## pairwise t-test pairwise.t.test(iris$Sepal.Width,iris$Species,p.adjust.method="bonferroni") TukeyHSD(anova.model) y.mean.group = aggregate(y,by=list(group),mean) wt = c(2,-1,-1) sc = sqrt(mse*sum(wt^2/n.ele.group[,2])) C.hat = sum(y.mean.group[,2]*wt) alpha = 0.05 C.hat - sc*sqrt(alpha*qf(alpha,3-1,150-3)) C.hat + sc*sqrt(alpha*qf(alpha,3-1,150-3)) #scheffe.test(anova.model,trt=c(-0.5,-0.5,1),group=TRUE) #### regression install.packages("ISwR") library("ISwR") data(cystfibr) attach(cystfibr) plot(cystfibr$pemax,cystfibr$age,ylab="PEmax",xlab="Age",pch=20) my.model = lm(pemax~age,data=cystfibr) summary(my.model) plot(age,pemax,cex=2,pch=20) names(my.model) abline(my.model$coeff[1],my.model$coeff[2],lwd=3,col="red") ####glm RK = read.table("http://www.math.pku.edu.cn/teachers/xirb/Courses/biostatistics/Biostatistics2014/RoadKills.txt",header=T) names(RK) plot(RK$D.PARK,RK$TOT.N,xlab="Distance to park", ylab="Road kills") M1<-glm(TOT.N~D.PARK,family=poisson,data=RK) summary(M1) MyData=data.frame(D.PARK=seq(from=0,to=25000,by=1000)) G<-predict(M1,newdata=MyData,type="link",se=T) F<-exp(G$fit) FSEUP<-exp(G$fit+1.96*G$se.fit) FSELOW<-exp(G$fit-1.96*G$se.fit) lines(MyData$D.PARK,F,lty=1) lines(MyData$D.PARK,FSEUP,lty=2) lines(MyData$D.PARK,FSELOW,lty=2) RK$SQ.POLIC<-sqrt(RK$POLIC) RK$SQ.WATRES<-sqrt(RK$WAT.RES) RK$SQ.URBAN<-sqrt(RK$URBAN) RK$SQ.OLIVE<-sqrt(RK$OLIVE) RK$SQ.LPROAD<-sqrt(RK$L.P.ROAD) RK$SQ.SHRUB<-sqrt(RK$SHRUB) RK$SQ.DWATCOUR<-sqrt(RK$D.WAT.COUR) M2<-glm(TOT.N~OPEN.L+MONT.S+SQ.POLIC+ SQ.SHRUB+SQ.WATRES+L.WAT.C+SQ.LPROAD+ SQ.DWATCOUR+D.PARK,family=poisson,data=RK) summary(M2) drop1(M2,test="Chi") ### the same pavalue can be obtained by using the anova command M3 <- glm(TOT.N ~ MONT.S + SQ.POLIC + D.PARK + SQ.SHRUB + SQ.WATRES + L.WAT.C + SQ.LPROAD + SQ.DWATCOUR, family = poisson, data = RK) anova(M2, M3, test = "Chi") M4<- glm(TOT.N ~ OPEN.L + MONT.S + SQ.POLIC+ SQ.SHRUB + SQ.WATRES + L.WAT.C + SQ.LPROAD+ SQ.DWATCOUR + D.PARK, family = quasipoisson, data = RK) summary(M4) drop1(M4,test="F") ### quasipoisson model M5<- glm(TOT.N ~ D.PARK, family = quasipoisson, data = RK) summary(M5) drop1(M5,test="F") M5<-glm(TOT.N~D.PARK,family=quasipoisson,data=RK) EP=resid(M5,type="pearson") ED=resid(M5,type="deviance") mu=predict(M5,type="response") E=RK$TOT.N-mu EP2=E/sqrt(7.630148*mu) op <- par(mfrow=c(2,2)) plot(x=mu,y=E,main="Response residuals") plot(x=mu,y=EP,main="Pearson residuals") plot(x=mu,y=EP2,main="Pearson residuals scaled") plot(x=mu,y=ED,main="Deviance residuals") par(op) M9 <- glm(TOT.N ~ OPEN.L + D.PARK, family = poisson, data = RK) #### negative binomial model library(MASS) M6<-glm.nb(TOT.N~OPEN.L+MONT.S+SQ.POLIC+ SQ.SHRUB+SQ.WATRES+L.WAT.C+SQ.LPROAD+ SQ.DWATCOUR+D.PARK,link="log",data=RK) M8<-glm.nb(TOT.N~OPEN.L+D.PARK,link="log",data=RK) summary(M8) drop1(M8,test="Chi") par(mfrow=c(2,2)) plot(M8) llhNB = logLik(M8) llhPoisson =logLik(M9) d <- 2 * (llhNB - llhPoisson) pval <- pchisq(as.numeric(d), df=1, lower.tail=FALSE)/2