install.packages("faraway") install.packages("leaps") library("faraway") library("leaps") ####simulation n = 100 beta = c(3,1.5,-5) x1 = matrix(rnorm(3*n),nrow=n,ncol=3) y = x1%*%beta + rnorm(n,sd=5) ynew = x1%*%beta + rnorm(n,sd=5) x = matrix(rnorm(1000*n),nrow=n,ncol=1000) x = cbind(x1,x) k = 1 lm.model = lm(y~.-1, data=data.frame(y=y,v=x[,1:k])) sm.model = summary(lm.model) sm.model$r.squared err.training = c() err.test = c() for(k in 1:100){ lm.model = lm(y~.-1, data=data.frame(y=y,v=x[,1:k])) sm.model = summary(lm.model) err.training[k] = 1-sm.model$r.squared err.test[k] = mean((ynew - cbind(x[,1:k])%*%sm.model$coef[,1])^2) } plot(err.training) plot(err.test) ###real data data(state) statedata <- data.frame(state.x77,row.names=state.abb,check.names=T) ### backward g <- lm(Life.Exp~., data=statedata) summary(g) g <- update(g, .~. - Area) summary(g) g <- update(g, .~. - Illiteracy) summary(g) g <- update(g, .~. - Income) summary(g) g <- update(g, .~. - Population) summary(g) ### forwards cov.names = names(statedata) cov.names = cov.names[cov.names!="Life.Exp"] cov.in = c() cov.out = cov.names g = lm(Life.Exp~1,data=statedata) p.values = c() g.models = list() i = 1 for(x in cov.out){ #gi = lm(paste("Life.Exp~",x),data=statedata) gi = update(g,paste(".~.+",x,sep=""),data=statedata) gi.sm = summary(gi) p.values = c(p.values,gi.sm$coef[nrow(gi.sm$coef),4]) g.models[[i]] = gi i = i+1 } names(p.values) = cov.out min(p.values) cov.in = c(cov.in,names(p.values)[which.min(p.values)]) cov.out = cov.names[is.na(match(cov.names,cov.in))] g = g.models[[which.min(p.values)]] ### or all together cov.names = names(statedata) cov.names = cov.names[cov.names!="Life.Exp"] cov.in = c() cov.out = cov.names g = lm(Life.Exp~1,data=statedata) flag = TRUE while(flag){ p.values = c() g.models = list() i = 1 for(x in cov.out){ #gi = lm(paste("Life.Exp~",x),data=statedata) gi = update(g,paste(".~.+",x,sep=""),data=statedata) gi.sm = summary(gi) p.values = c(p.values,gi.sm$coef[nrow(gi.sm$coef),4]) g.models[[i]] = gi i = i+1 } names(p.values) = cov.out if(min(p.values)<0.05){ cov.in = c(cov.in,names(p.values)[which.min(p.values)]) cov.out = cov.names[is.na(match(cov.names,cov.in))] g = g.models[[which.min(p.values)]] }else{flag=FALSE} } ### AIC based g <- lm(Life.Exp~ ., data=statedata) step(g,direction="both") #g <- lm(Life.Exp~1, data=statedata) #step(g,scope=Life.Exp~.,direction="forward") ### ridge regression library(MASS) fit <- lm.ridge(Life.Exp~ .,lambda=seq(0,50,by=0.1),data=statedata) lm.ridge(Life.Exp~ .,lambda=2.8,data=statedata) #### install.packages("lars") install.packages("elasticnet") library(lars) library(elasticnet) y = as.vector(statedata[,4]) x = as.matrix(statedata[,-4]) g=lars(y=y,x=x) cv.lars(y=statedata[,4],x=x) LARSL = function(x,y,K = 10) { abscissa = seq(0,1,length=100) temp = cv.lars(x,y,mode="fraction",K=K, plot.it=F) s = temp$index[which(temp$cv == min(temp$cv))] err = min(temp$cv) temp = lars(x,y) result.lars = predict(temp, s = s, type = "coefficients", mode = "fraction") result = list(s = s, beta = result.lars$coef, err = err) return(result) } LARSL(x,y) ENL = function(x,y,lambda2 = c(0,0.01,0.1,1,10,100,1000), K = 10, mode = "fraction", max.steps = 200) { # The elastic net algorithm with L1 penalty lambda1 chosen by K-fold cross validation. # The loss function is MSE. The default CV is on the fractions seq(0,1,length=100). This # is easy to control. If we want to use lambda1, it is not straightforward how to # decide the abscissa. The L2 penalty term lambda2 is chosen as the best one in the pool. n = dim(x)[1] p = dim(x)[2] s = err = NULL # to record, for each lambda2, the L1 norm fraction chosen by CV and the # corresponding prediction error. if (mode == "fraction") abscissa = seq(0,1,length=100) if (mode == "step") abscissa = 1:max.steps for (index2 in 1:length(lambda2)) { # The K-fold cross validation temp = cv.enet(x,y,lambda=lambda2[index2],s=abscissa,mode=mode,K=K, plot.it=F) s = c(s, abscissa[which(temp$cv == min(temp$cv))]) err = c(err, min(temp$cv)) } lambda2.chosen = lambda2[which(err == min(err))] s.chosen = s[which(err == min(err))] temp = enet(x,y,lambda=lambda2.chosen, max.steps = max.steps) result.en = predict(temp, s = s.chosen, type = "coefficients", mode = mode) result = list(s = s.chosen, lambda2 = lambda2.chosen, beta = result.en$coef, err = min(err)) return(result) } ### adaptive lasso lasso.adapt.bic2<-function(x,y){ # adaptive lasso from lars with BIC stopping rule # this one uses the "known variance" version of BIC with RSS/(full model mse) # must use a recent version of R so that normalize=FALSE can be used in lars require(lars) ok<-complete.cases(x,y) x<-x[ok,] # get rid of na's y<-y[ok] # since regsubsets can't handle na's m<-ncol(x) n<-nrow(x) x<-as.matrix(x) # in case x is not a matrix # standardize variables like lars does one <- rep(1, n) meanx <- drop(one %*% x)/n xc <- scale(x, meanx, FALSE) # first subtracts mean normx <- sqrt(drop(one %*% (xc^2))) names(normx) <- NULL xs <- scale(xc, FALSE, normx) # now rescales with norm (not sd) out.ls=lm(y~xs) # ols fit on standardized beta.ols=out.ls$coeff[2:(m+1)] # ols except for intercept w=abs(beta.ols) # weights for adaptive lasso xs=scale(xs,center=FALSE,scale=1/w) # xs times the weights object=lars(xs,y,type="lasso",normalize=FALSE) # get min BIC # bic=log(n)*object$df+n*log(as.vector(object$RSS)/n) # rss/n version sig2f=summary(out.ls)$sigma^2 # full model mse bic2=log(n)*object$df+as.vector(object$RSS)/sig2f # Cp version step.bic2=which.min(bic2) # step with min BIC fit=predict.lars(object,xs,s=step.bic2,type="fit",mode="step")$fit coeff=predict.lars(object,xs,s=step.bic2,type="coef",mode="step")$coefficients coeff=coeff*w/normx # get back in right scale st=sum(coeff !=0) # number nonzero mse=sum((y-fit)^2)/(n-st-1) # 1 for the intercept # this next line just finds the variable id of coeff. not equal 0 if(st>0) x.ind<-as.vector(which(coeff !=0)) else x.ind<-0 intercept=as.numeric(mean(y)-meanx%*%coeff) return(list(fit=fit,st=st,mse=mse,x.ind=x.ind,coeff=coeff,intercept=intercept,object=object, bic2=bic2,step.bic2=step.bic2)) } n = 100 beta = c(3,1.5,-5) x1 = matrix(rnorm(3*n),nrow=n,ncol=3) y = x1%*%beta + rnorm(n,sd=5) ynew = x1%*%beta + rnorm(n,sd=5) x = matrix(rnorm(20*n),nrow=n,ncol=20) x = cbind(x1,x) tmp=lasso.adapt.bic2(x,y) tmp$coeff LARSL(x,y) ####ridge regression and lasso library(ISLR) fix(Hitters) names(Hitters) sum(is.na(Hitters$Salary)) Hitters=na.omit(Hitters) dim(Hitters) sum(is.na(Hitters)) library(leaps) x=model.matrix(Salary~.,Hitters)[,-1] y=Hitters$Salary # Ridge Regression library(glmnet) grid=10^seq(10,-2,length=100) ridge.mod=glmnet(x,y,alpha=0,lambda=grid) dim(coef(ridge.mod)) ridge.mod$lambda[50] coef(ridge.mod)[,50] sqrt(sum(coef(ridge.mod)[-1,50]^2)) ridge.mod$lambda[60] coef(ridge.mod)[,60] sqrt(sum(coef(ridge.mod)[-1,60]^2)) predict(ridge.mod,s=50,type="coefficients")[1:20,] set.seed(1) train=sample(1:nrow(x), nrow(x)/2) test=(-train) y.test=y[test] ridge.mod=glmnet(x[train,],y[train],alpha=0,lambda=grid, thresh=1e-12) ridge.pred=predict(ridge.mod,s=4,newx=x[test,]) mean((ridge.pred-y.test)^2) mean((mean(y[train])-y.test)^2) ridge.pred=predict(ridge.mod,s=1e10,newx=x[test,]) mean((ridge.pred-y.test)^2) ridge.pred=predict(ridge.mod,s=0,newx=x[test,],exact=T) mean((ridge.pred-y.test)^2) lm(y~x, subset=train) predict(ridge.mod,s=0,exact=T,type="coefficients")[1:20,] set.seed(1) cv.out=cv.glmnet(x[train,],y[train],alpha=0) plot(cv.out) bestlam=cv.out$lambda.min bestlam ridge.pred=predict(ridge.mod,s=bestlam,newx=x[test,]) mean((ridge.pred-y.test)^2) out=glmnet(x,y,alpha=0) predict(out,type="coefficients",s=bestlam)[1:20,] # The Lasso lasso.mod=glmnet(x[train,],y[train],alpha=1,lambda=grid) plot(lasso.mod) set.seed(1) cv.out=cv.glmnet(x[train,],y[train],alpha=1) plot(cv.out) bestlam=cv.out$lambda.min lasso.pred=predict(lasso.mod,s=bestlam,newx=x[test,]) mean((lasso.pred-y.test)^2) out=glmnet(x,y,alpha=1,lambda=grid) lasso.coef=predict(out,type="coefficients",s=bestlam)[1:20,] lasso.coef lasso.coef[lasso.coef!=0] #####Bayesian methods install.packages("BoomSpikeSlab") library(BoomSpikeSlab) n = 100 beta = c(3,1.5,-5) x1 = matrix(rnorm(3*n),nrow=n,ncol=3) y = x1%*%beta + rnorm(n,sd=2) ynew = x1%*%beta + rnorm(n,sd=2) x = matrix(rnorm(5*n),nrow=n,ncol=5) x = cbind(x1,x) dta = data.frame(y,x) tmp = lm.spike(y~.-1,niter=10000,data=dta) sapply(1:8,FUN=function(i){median(tmp$beta[1000:10000,i])}) ####distance correlation #install.packages("cdcsis") #library("cdcsis") install.packages("energy") library("energy") x = rnorm(100,sd=3) y = x^2 + rnorm(100,sd=3) dcor(x,y) cor(x,y) cor.test(x,y) dcor.ttest(x,y) dcov.test(x,y,R = 10000)