seed_value <- 19710822 set.seed(seed_value) options(width = 72) opts_chunk$set(fig.width=4, fig.height=4, fig.align='center', fig.show='hold', out.width='0.3\\linewidth') x <- rnorm(n = 300, mean = 7, sd = 1) y <- rnorm(n = 300, mean = 21, sd = 2) plot(y ~ x) samples <- cbind(x, y) head(samples) cm <- cov(samples) # compute the covariance matrix round(cm, digits = 3) # print it so it is readable x <- rnorm(n = 300, mean = 7, sd = 1) y <- rnorm(n = 300, mean = 7, sd = 2) + 2 * x plot(y ~ x) samples <- cbind(x, y) cm <- cov(samples) # compute the covariance matrix round(cm, digits = 3) plot(y ~ x, xlim = c(0,30), ylim=c(0,30), cex = 0.5) results <- prcomp(samples) pc1 <- results$rotation[,"PC1"] samples.xfromed <- results$x slope <- pc1['y']/pc1['x'] point <- c(x = mean(x), y = mean(y)) intercept <- (point['y'] - slope * point['x']) plot(y ~ x, xlim = c(0,30), ylim=c(0,30), cex = 0.5) abline(b = slope, a = intercept, col = 'blue', lwd=2) plot(PC2 ~ PC1, data = samples.xfromed, xlim = c(-10, 10), ylim=c(-10, 10), cex = 0.5) abline(h = 0, col = "blue", lty = 2) abline(v = 0, col = "red", lty = 2) round(cov(samples.xfromed), digits = 3) results$sdev explained_variance <- round((results$sdev^2) / sum(results$sdev^2), digits = 2) explained_variance #source("http://bioconductor.org/biocLite.R") #biocLite("bladderbatch") library(bladderbatch) data(bladderdata) # retrieving sample information sample.info <- pData(bladderEset) # retrieving expression data exprs <- exprs(bladderEset) bladder.pca <- prcomp(t(exprs)) library(ggplot2) # combining sample info with pca result pca.plot <- cbind(sample.info, bladder.pca$x[rownames(sample.info),]) # color labeling points using the cancer status ggplot(pca.plot, aes(x = PC1, y = PC2, col = cancer)) + geom_point() + theme_bw() var.percent <- bladder.pca$sdev^2 / sum(bladder.pca$sdev^2) * 100 barplot(var.percent, main = "Percent variance explained", xlab = "PC index", ylab = "Percent variance")