# Data wrangling library(tidyverse) # using tidyr set.seed(1) mouse_weights_sim <- data.frame( time = seq(as.Date("2017/1/1"), by = "month", length.out = 12), mickey = rnorm(12, 20, 1), minnie = rnorm(12, 20, 2), mighty = rnorm(12, 20, 4) ) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, mickey, minnie, mighty) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, -time) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, mickey:mighty) ggplot(mouse_weights, aes(x = mouse, y = weight)) + geom_boxplot(aes(fill = mouse)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_boxplot(aes(group = time)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_boxplot(aes(group = time)) + geom_point(aes(color = mouse)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_point(aes(color = mouse)) + geom_line(aes(group = mouse, color = mouse)) #rm(USPersonalExpenditure) uspe_df <- as.data.frame(USPersonalExpenditure) uspe_df$Category <- rownames(USPersonalExpenditure) uspe <- gather(uspe_df, Year, Amount, -Category) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", aes(fill = Category)) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", aes(fill = Category)) + theme(legend.justification = c(0,1), legend.position = "inside", legend.position.inside = c(0,1)) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", position = "dodge", aes(fill = Category)) + theme(legend.justification = c(0,1), legend.position = "inside", legend.position.inside = c(0,1)) spread(data = mouse_weights, key = mouse, value = weight) spread(ablation, key = CellType, value = Score) abl_united <- unite(ablation, ExptCell, Experiment, CellType, sep = ".") spread(abl_united, ExptCell, Score) separate(abl_united, ExptCell, c("Expt", "Cell"), sep = "\\.") library(dplyr) experiment_log <- data.frame(Experiment = c("E1909", "E1915", "E1921"), Tech = c("Goneril", "Regan", "Cordelia"), stringsAsFactors = TRUE) str(experiment_log) experiment_log inner_join(ablation, experiment_log) save(ablation, file = "ablation.Rdata") load("ablation.Rdata") head(select(msleep, name, sleep_total)) head(msleep[ , c("name", "sleep_total")]) class(msleep) msleep %>% select(name, sleep_total) %>% head msleep %>% select(name, sleep_total) %>% head head(msleep[ , -1]) head(select(msleep, -name)) head(select(msleep, -c(name, sleep_total))) msleep %>% select(-c(name, sleep_total)) %>% head msleep %>% select(starts_with("sl")) %>% head head(msleep[ , startsWith(names(msleep), "sl")]) msleep[msleep$sleep_total >= 16, ] msleep %>% filter(sleep_total >= 16) msleep %>% filter(order %in% c("Perissodactyla", "Primates")) msleep[msleep$order %in% c("Perissodactyla", "Primates"), ] msleep %>% filter(sleep_total >= 16, bodywt >= 1) msleep %>% filter(sleep_total >= 16 & bodywt >= 1) msleep[msleep$sleep_total >= 16 & msleep$bodywt >=1, ] msleep %>% arrange(order) %>% head msleep %>% arrange(desc(order)) %>% head msleep %>% select(name, order, sleep_total) %>% arrange(order, sleep_total) %>% head msleep %>% arrange(order, sleep_total) %>% select(name, order) %>% head ToothGrowth %>% summarize(meanLen = mean(len)) ToothGrowth %>% group_by(supp) %>% summarize(meanLen = mean(len)) ToothGrowth %>% group_by(supp, dose) %>% summarize(meanLen = mean(len), n = n()) ToothGrowth %>% group_by(supp, dose) %>% mutate(norm.len = (len - mean(len))/sd(len), max = max(len)) %>% print(n = 60) ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean_score = mean(Score)) %>% spread(CellType, mean_score) ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(min = min(Score), max = max(Score)) ablation_mean_sd <- ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean = mean(Score), sd = sd(Score)) ggplot(ablation_mean_sd, aes(x = Time, y = mean)) + geom_point(size = 4) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.4) + facet_grid(Measurement ~ CellType) + geom_line() + geom_point(data = ablation, aes(y = Score), color = "pink", shape = 1) + labs(title = "+/- 1 SD") ablation_mean_ci <- ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean = mean(Score), lower_limit = t.test(Score)$conf.int[1], upper_limit = t.test(Score)$conf.int[2]) ablation %>% mutate(rate = ifelse(Time > 0, Score / Time, NA)) ggplot(ablation_mean_sd, aes(x = Time, y = mean)) + geom_point(size = 2) + geom_errorbar(aes(ymin = mean - sd, ymax = mean + sd), width = 0.4) + facet_grid(Measurement ~ CellType) + geom_line() + geom_point(data = ablation %>% group_by(Measurement, CellType, Time) %>% mutate(outlier = abs((Score - mean(Score)) / sd(Score)) > 1), aes(y = Score, color = outlier), size = 4, shape = 1) + labs(title = "+/- 1 SD", y = "Mean") + scale_colour_discrete(name = "Outlier Status", labels = c("Within 1 SD", "Outside 1 SD")) # Complete example library(tidyverse) # using ggplot2, dplyr, tidyr packages analyze.all <- function(save_plots = TRUE) { # Load data ablation <- read.csv(file = "Ablation.csv", header = TRUE, stringsAsFactors = TRUE) names(ablation)[names(ablation) == "SCORE"] <- "Score" print(ablation) ablation_means <- ablation %>% group_by(CellType, Measurement, Time) %>% summarize(mean = mean(Score), n = n()) print(ablation_means) # Set up plotting if (save_plots) { pdf(file = "plot.pdf") } # Plot all data g <- ggplot(ablation, aes(x = Time, y = Score)) + geom_point() + geom_line(aes(color = Experiment)) + facet_grid(Measurement ~ CellType) + theme_bw() print(g) # Plot averages over experiments g <- ggplot(ablation_means, aes(x = Time, y = mean)) + geom_point() + geom_line(aes(color = CellType)) + facet_wrap(~ Measurement) + theme_bw() print(g) # Separate plots of averages over experiments for (measurement in levels(ablation.means$Measurement)) { g <- ggplot(ablation_means %>% filter(Measurement == measurement), aes(x = Time, y = mean)) + geom_point() + geom_line(aes(color = CellType)) + labs(title = measurement) + theme_bw() print(g) } # Close plotting device if (save_plots) { dev.off() } } analyze.all(FALSE)