################################################################################ # File: SparCC.R # Aim : SparCC #------------------------------------------------------------------------------- # Author: Fang Huaying (Peking University) # Email : hyfang@pku.edu.cn # Date : 11/12/2014 #------------------------------------------------------------------------------- # SparCC for counts known require(gtools); SparCC.count <- function(x, imax = 20, kmax = 10, alpha = 0.1, Vmin = 1e-4) { # dimension for w (latent variables) p <- ncol(x); n <- nrow(x); # posterior distribution (alpha) x <- x + 1; # store generate data y <- matrix(0, n, p); # store covariance/correlation matrix cov.w <- cor.w <- matrix(0, p, p); indLow <- lower.tri(cov.w, diag = T); # store covariance/correlation for several posterior samples covs <- cors <- matrix(0, p * (p + 1) / 2, imax); for(i in 1:imax) { # generate fractions from posterior distribution y <- t(apply(x, 1, function(x) gtools::rdirichlet(n = 1, alpha = x))); # estimate covariance/correlation cov_cor <- SparCC.frac(x = y, kmax = kmax, alpha = alpha, Vmin = Vmin); # store variance/correlation only low triangle covs[, i] <- cov_cor$cov.w[indLow]; cors[, i] <- cov_cor$cor.w[indLow]; } # calculate median for several posterior samples cov.w[indLow] <- apply(covs, 1, median); cor.w[indLow] <- apply(cors, 1, median); # cov.w <- cov.w + t(cov.w); diag(cov.w) <- diag(cov.w) / 2; cor.w <- cor.w + t(cor.w); diag(cor.w) <- 1; # return(list(cov.w = cov.w, cor.w = cor.w)); } #------------------------------------------------------------------------------- # SparCC for fractions known SparCC.frac <- function(x, kmax = 10, alpha = 0.1, Vmin = 1e-4) { # Log transformation x <- log(x); p <- ncol(x); # T0 = var(log(xi/xj)) variation matrix TT <- stats::var(x); T0 <- diag(TT) + rep(diag(TT), each = p) - 2 * TT; # Variance and correlation coefficients for Basic SparCC rowT0 <- rowSums(T0); var.w <- (rowT0 - sum(rowT0) / (2 * p - 2))/(p - 2); var.w[var.w < Vmin] <- Vmin; #cor.w <- (outer(var.w, var.w, "+") - T0 ) / # sqrt(outer(var.w, var.w, "*")) / 2; Is <- sqrt(1/var.w); cor.w <- (var.w + rep(var.w, each = p) - T0) * Is * rep(Is, each = p) * 0.5; # Truncated correlation in [-1, 1] cor.w[cor.w <= - 1] <- - 1; cor.w[cor.w >= 1] <- 1; # Left matrix of estimation equation Lmat <- diag(rep(p - 2, p)) + 1; # Remove pairs rp <- NULL; # Left components cp <- rep(TRUE, p); # Do loops until max iteration or only 3 components left k <- 0; while(k < kmax && sum(cp) > 3) { # Left T0 = var(log(xi/xj)) after removing pairs T02 <- T0; # Store current correlation to find the strongest pair curr_cor.w <- cor.w; # Remove diagonal diag(curr_cor.w) <- 0; # Remove removed pairs if(!is.null(rp)) { curr_cor.w[rp] <- 0; } # Find the strongest pair in vector form n_rp <- which.max(abs(curr_cor.w)); # Remove the pair if geater than alpha if(abs(curr_cor.w[n_rp]) >= alpha) { # Which pair in matrix form t_id <- c(arrayInd(n_rp, .dim = c(p, p))); Lmat[t_id, t_id] <- Lmat[t_id, t_id] - 1; # Update remove pairs n_rp <- c(n_rp, (p + 1) * sum(t_id) - 2 * p - n_rp); rp <- c(rp, n_rp); # Update T02 T02[rp] <- 0; # Which component left cp <- (diag(Lmat) > 0); # Update variance and truncated lower by Vmin var.w[cp] <- solve(Lmat[cp, cp], rowSums(T02[cp, cp])); var.w[var.w <= Vmin] <- Vmin; # Update correlation matrix and truncated by [-1, 1] #cor.w <- (outer(var.w, var.w, "+") - T0 ) / # sqrt(outer(var.w, var.w, "*")) / 2; Is <- sqrt(1/var.w); cor.w <- (var.w + rep(var.w, each = p) - T0) * Is * rep(Is, each = p) * 0.5; # Truncated correlation in [-1, 1] cor.w[cor.w <= - 1] <- - 1; cor.w[cor.w >= 1] <- 1; } else { break; } # k <- k + 1; } # Covariance Is <- sqrt(var.w); cov.w <- cor.w * Is * rep(Is, each = p); # return(list(cov.w = cov.w, cor.w = cor.w)); } #------------------------------------------------------------------------------- #-------------------------------------------------------------------------------