library(lattice) library(MASS) sleep <- read.csv('/Users/dlambert/CN/Data/sleep1.csv') names(sleep)[names(sleep) == 'sleepExposure'] <- 'exposure' sleep[seq(1, nrow(sleep), 5), ] # Count NAs nNAs <- sapply(is.na(sleep), sum) nNAs <- apply(is.na(sleep), 2, sum) keepNames <- setdiff(names(sleep), c('dreamSleep', 'slowWaveSleep')) naRows <- (1:nrow(sleep))[apply(is.na(sleep[, keepNames]), 1, any)] newX <- c('brain', 'life', 'gestation', 'predation', 'exposure') allX <- c(newX[1:3], 'body', newX[4:5], 'danger') round(sapply(sleep[, newX], quantile, na.rm = TRUE), 1) sleepR2 <- rep(NA, 7) names(sleepR2) <- allX for (i in allX[1:4]) { z <- lm(sleep$sleep ~ log(sleep[[i]])) sleepR2[i] <- summary(z)$r.squared } for (i in allX[5:7]) { z <- lm(sleep$sleep ~ sleep[[i]]) sleepR2[i] <- summary(z)$r.squared } sleepLog <- sleep for (i in c('brain', 'gestation', 'body')) { sleepLog[[i]] <- log(sleep[[i]]) } newX <- c('brain', 'gestation', 'body', 'exposure', 'danger') plotList <- list() for (i in newX) { plotList[[i]] <- xyplot(sleepLog$sleep ~ sleepLog[[i]], xlab = i, ylab = 'sleep', pch = 16, cex = 1.25) } png('/Users/dlambert/CN/Figures/onePredictor.png', height = 250, width = 1000) for (i in 1:4) { print(plotList[[i]], position = c(.25 * (i-1), 0, .25*i, 1), more = TRUE) } print(plotList[[i]], position = c(.75, 0, 1, 1)) graphics.off() z <- lm(sleep ~ gestation , data = sleepLog) z <- lm(sleep ~ gestation + body + danger, data = sleepLog) summary(z) # Get fitted values for species with NA for sleep$sleep indx <- which(is.na(sleepLog$sleep)) predNA <- predict(z, newdata = sleepLog[indx, ]) predAll <- predict(z, newdata = sleepLog) # Plot fitted values against each of the predictors plotList <- list() for (i in c('gestation', 'body', 'danger')) { plotList[[i]] <- xyplot(predAll ~ sleepLog[[i]], ylab = 'sleep', xlab = i, pch = 16, cex = 1.25, panel = function(x, y, ...) { panel.grid(h=-1, v=-1) panel.xyplot(x, y, ...) panel.points(sleepLog[[i]][indx], predNA, col = 'red', pch = 16, cex = 1.25) }) } png('/Users/dlambert/CN/Figures/fittedNA.png', height = 250, width = 800) print(plotList[[1]], position = c(0, 0, 1/3, 1), more = T) print(plotList[[2]], position = c(1/3, 0, 2/3, 1), more=T) print(plotList[[3]], position = c(2/3, 0, 1, 1)) graphics.off() # Plot observed values against each of the predictors, but # use the fitted values for the outcomes with NA. plotList <- list() for (i in c('gestation', 'body', 'danger')) { plotList[[i]] <- xyplot(sleep$sleep ~ sleepLog[[i]], ylab = 'sleep', xlab = i, pch = 16, cex = 1.25, panel = function(x, y, ...) { panel.grid(h=-1, v=-1) panel.xyplot(x, y, ...) panel.points(sleepLog[[i]][indx], predNA, col = 'red', pch = 16, cex = 1.25) }) } png('/Users/dlambert/CN/Figures/observedNA.png', height = 250, width = 800) print(plotList[[1]], position = c(0, 0, 1/3, 1), more = T) print(plotList[[2]], position = c(1/3, 0, 2/3, 1), more=T) print(plotList[[3]], position = c(2/3, 0, 1, 1)) graphics.off() # Simulating uncertainty in the model. z <- lm(sleep ~ gestation, data = sleepLog) zSumm <- summary(z) zCorr <- zSumm$sigma^2 * zSumm$cov.unscaled simEstimates <- mvrnorm(30, z$coefficients,zCorr) png(file = '/Users/dlambert/CN/Figures/gestationUnc.png') xyplot(sleep ~ gestation, data = sleepLog, pch = 16, cex = 1.25, panel = function(x,y, ...) { panel.grid(h = -1, v = -1) panel.xyplot(x, y, ...) for (i in 1:30) { panel.abline(simEstimates[i, ], col = gray(.7)) } panel.abline(z$coefficients, col = 'red', lwd = 1.25) }) dev.off() # Repeat for log(body) z <- lm(sleep ~ body, data = sleepLog) zSumm <- summary(z) zCorr <- zSumm$sigma^2 * zSumm$cov.unscaled simEstimates <- mvrnorm(30, z$coefficients,zCorr) png(file = '/Users/dlambert/CN/Figures/bodyUnc.png') xyplot(sleep ~ body, data = sleepLog, pch = 16, cex = 1.25, panel = function(x,y, ...) { panel.grid(h = -1, v = -1) panel.xyplot(x, y, ...) for (i in 1:30) { panel.abline(simEstimates[i, ], col = gray(.7)) } panel.abline(z$coefficients, col = 'red', lwd = 1.25) }) dev.off() # To compare to a model with two predictors, plot against # both. z <- lm(sleep ~ gestation, data = sleepLog) zSumm <- summary(z) zCorr <- zSumm$sigma^2 * zSumm$cov.unscaled simEstimates <- mvrnorm(30, z$coefficients,zCorr) sleepLog$dangerFact <- factor(sleepLog$danger) png(file = '/Users/dlambert/CN/Figures/gestationUncDanger.png', width = 850, height = 250) xyplot(sleep ~ gestation | dangerFact, data = sleepLog, pch = 16, cex = 1.25, xlab = 'LOG(Gestation)', ylab = 'SLEEP', main = 'Regression of Sleep on Log Gestation', layout = c(5, 1), panel = function(x, y, ...) { panel.grid(h = -1, v = -1) panel.xyplot(x, y, ...) for (i in 1:30) { panel.abline(simEstimates[i, ], col = gray(.7)) } panel.abline(z$coef, col='red', lwd = 1.25) }) graphics.off() # Add a discrete variable to the model with danger sleepGestR2 <- rep(NA, 3) names(sleepGestR2) <- c('predation', 'danger', 'exposure') for (i in names(sleepGestR2)) { z <- lm(sleep$sleep ~ sleepLog$gestation + sleepLog[[i]]) print(summary(z)) sleepGestR2[i] <- summary(z)$r.squared } # danger linearly and additively zL <- lm(sleep ~ gestation + danger, data = sleepLog) zA <- lm(sleep ~ gestation + dangerFact, data = sleepLog) aIntercept <- zA$coef[1] + c(0, zA$coef[3:6]) LIntercept <- zL$coef[1] + seq(5) * zL$coef[2] png(file = '/Users/dlambert/CN/Figures/gestDangerLA.png', width = 800, height = 300) xyplot(sleep ~ gestation | dangerFact, data = sleepLog, cex = 1.25, pch = 16, layout = c(5, 1), panel = function(x, y, ...) { panel.grid(h = -1, v = -1) pno <- panel.number() panel.xyplot(x, y, ...) panel.abline(LIntercept[pno], zL$coef[2], col = 'red', lwd = 1.4) panel.abline(aIntercept[pno], zA$coef[2], lwd = 1.4, col = 'forest green') }) graphics.off() zI <- lm(sleep ~ gestation * dangerFact, data = sleepLog) # Interaction of numeric variables. z <- lm(sleep ~ gestation * danger, data = sleepLog) z <- lm(sleep ~ gestation + danger + I(gestation * danger), data = sleepLog) # Uncertainty of the model with an interaction of danger # as a factor and log(gestation). zISumm <- summary(zI) zICorr <- zISumm$sigma^2 * zISumm$cov.unscaled simEstimates <- mvrnorm(500, zI$coefficients,zICorr) newX <- sapply(split(sleepLog$gestation, sleep$danger), sample, 2) newX <- data.frame(gestation = c(newX), danger = rep(1:5, each = 2)) # Get intercepts and slopes under each simulated model intercepts <- array(NA, c(500, 10)) slopes <- array(NA, c(500, 10)) for (i in 1:10) { iDang <- newX$danger[i] intercepts[, i] <- simEstimates[, 1] slopes[, i] <- simEstimates[, 2] if (iDang > 1) { intercepts[, i] <- intercepts[,i] + simEstimates[, 1 + iDang] slopes[,i] <- slopes[,i] + simEstimates[, 5 + iDang] } } # Get new means, newMeans <- array(NA, c(500, 10)) for (i in 1:10) { newMeans[, i] <- intercepts[, i] + slopes[, i] * newX$gestation[i] } # Get confidence intervals. names(newX) <- c('gestation', 'dangerFact') newX$dangerFact <- factor(newX$dangerFact) newPredInt <- predict(zI, newdata = newX, interval = 'confidence', level = .9) round(cbind(newX[,1], as.numeric(newX[,2]), round(newPredInt,1))) # Find how many simulated means are in, below and above # the confidence intervals. nIn <- nBelow <- nAbove <- rep(NA, 10) for (i in 1:10) { nBelow[i] <- sum(newMeans[,i] < newPredInt[i, 'lwr']) nAbove[i] <- sum(newMeans[,i] > newPredInt[i,'upr']) } nIn <- 500 - nBelow - nAbove simInt <- apply(newMeans, 2, quantile, c(.05, .95)) x <- round(cbind(simInt[,1], newPredInt[, 'lwr'], simInt[,2], newPredInt[,'upr']), 2) colnames(x) <- c('simLow', 'ciLow', 'simHi', 'ciHi')