count <- as.matrix(read.table("data/combo_count_tab.txt"))
depth <- sapply(strsplit(colnames(count), "\\."), length)
x <- count[, depth == 6 & colSums(count != 0) >= 1]
x[x == 0] <- 0.5
x <- x/rowSums(x)
z <- log(x)

demo <- read.delim("data/demographic.txt")
y <- demo$bmi[match(rownames(count), demo$pid)]

# type "R CMD SHLIB cdmm.c" to get cdmm.dll under Windows or cdmm.so under Linux
dyn.load("cdmm.dll")
#dyn.load("cdmm.so")
source("cdmm.R")

set.seed(23)
n <- length(y); ntrn <- 70; nrep <- 100
pe <- numeric(nrep); pe.lasso <- numeric(nrep)
for (i in 1:nrep) {
	itrn <- sample(n, ntrn)
	itst <- setdiff(1:n, itrn)
	ans <- cv.cdmm(y[itrn], z[itrn, ], refit=TRUE)
	bet <- ans$bet; int <- ans$int
	pe[i] <- mean((y[itst] - int - z[itst, ] %*% bet)^2)
	ans <- cv.cdmm(y[itrn], z[itrn, ], refit=TRUE, constr=FALSE)
	bet.lasso <- ans$bet; int.lasso <- ans$int
	pe.lasso[i] <- mean((y[itst] - int.lasso - z[itst, ] %*% bet.lasso)^2)
	cat("Rep.", i, "done.\n")
}
save(pe, pe.lasso, file="result/bmi_pe.rda")

load("result/bmi_pe.rda")
mean(pe)
sd(pe)/sqrt(length(pe))
mean(pe.lasso)
sd(pe.lasso)/sqrt(length(pe.lasso))

set.seed(42)
p <- ncol(z); nboot <- 100
bet.bcv <- matrix(, p, nboot)
bet.bcv.lasso <- matrix(, p, nboot)
for (i in 1:nboot) {
	bootid <- sample(1:length(y), replace=TRUE)
	bet.bcv[, i] <- cv.cdmm(y[bootid], z[bootid, ], refit=TRUE)$bet
	bet.bcv.lasso[, i] <- cv.cdmm(y[bootid], z[bootid, ], refit=TRUE, constr=FALSE)$bet
	cat("Boot.", i, "done.\n")
}
stab.prob <- stab.cdmm(y, z)$prob
stab.prob.lasso <- stab.cdmm(y, z, constr=FALSE)$prob
save(bet.bcv, bet.bcv.lasso, stab.prob, stab.prob.lasso, file="result/bmi_result.rda")

load("result/bmi_result.rda")
bcv.prob <- rowMeans(bet.bcv != 0)
bcv.prob.lasso <- rowMeans(bet.bcv.lasso != 0)

isel <- bcv.prob >= 0.7
data.frame(genus=colnames(z)[isel], bcv.prob=bcv.prob[isel], stab.prob=stab.prob[isel])

isel.lasso <- bcv.prob.lasso >= 0.7
data.frame(genus=colnames(z)[isel.lasso], bcv.prob=bcv.prob.lasso[isel.lasso], stab.prob=stab.prob.lasso[isel.lasso])

bcv.sgn <- rbind(rowMeans(bet.bcv > 0), rowMeans(bet.bcv < 0))
taxa <- matrix(unlist(strsplit(colnames(z), "\\.")), 6)
phyla <- taxa[2, ]; genera <- taxa[6, ]

pdf.options(width=4.5, height=3.6)
pdf(file="fig/prob.pdf", family="Times")
#setEPS(width=4.5, height=3.6)
#postscript(file="fig/prob.eps", family="Times")

par(mai=c(0.6, 0.6, 0.1, 0), mgp=c(1.8, 0.6, 0))
mp <- barplot(bcv.sgn, space=0, las=1, xlab="Genus", ylab="")
title(ylab="Selection probability", mgp=c(2.1, 0.7, 0))
ticks <- c(mp[match(unique(phyla), phyla)] - 0.5, max(mp) + 0.5)
axis(1, ticks, labels=FALSE)
text(ticks[2] - 1, -0.09, unique(phyla)[1], adj=1, xpd=TRUE)
text(ticks[2] + 1, -0.09, unique(phyla)[2], adj=0, xpd=TRUE)
text(mean(ticks[3:4]), -0.09, unique(phyla)[3], xpd=TRUE)
text(mean(ticks[6:7]) - 1, -0.09, unique(phyla)[6], xpd=TRUE)
segments(mean(ticks[1:2]) - 2, -0.06, mean(ticks[1:2]), -0.02, xpd=TRUE)
segments(mean(ticks[2:3]) + 2, -0.06, mean(ticks[2:3]), -0.02, xpd=TRUE)
segments(mean(ticks[3:4]), -0.06, , -0.02, xpd=TRUE)
segments(mean(ticks[6:7]), -0.06, , -0.02, xpd=TRUE)
dev.off()

ans <- cdmm(y, z[, isel], 0)
(bet <- as.numeric(ans$sol))
int <- ans$int
fitted <- int + drop(z[, isel] %*% bet)
ran <- range(c(y, fitted))

pdf.options(width=3.6, height=3.6)
pdf(file="fig/fitted.pdf", family="Times")
#setEPS(width=3.6, height=3.6)
#postscript(file="fig/fitted.eps", family="Times")

par(mai=c(0.6, 0.6, 0.1, 0.1), mgp=c(1.8, 0.6, 0))
plot(y, fitted, las=1, xlab="Observed BMI", ylab="Fitted BMI", xlim=ran, ylim=ran, asp=1)
abline(0, 1, lty="dashed")
dev.off()
