# install.packages("tidyverse") library(tidyverse) # Data wrangling library(tidyverse) # using tidyr set.seed(1) mouse.weights.sim <- data.frame( time = seq(as.Date("2017/1/1"), by = "month", length.out = 12), mickey = rnorm(12, 20, 1), minnie = rnorm(12, 20, 2), mighty = rnorm(12, 20, 4) ) mouse_weights <- gather(data = mouse_sim_weights, # data frame to be manipulated key = mouse, # name of the future column storing the mouse names value = weight, # name of the future column storing the weight measurements mickey, minnie, mighty) # all the columns that contain the values mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, -time) mouse_weights <- gather(data = mouse_weights_sim, key = mouse, value = weight, mickey:mighty) ggplot(mouse_weights, aes(x = mouse, y = weight)) + geom_boxplot(aes(fill = mouse)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_boxplot(aes(group = time)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_boxplot(aes(group = time)) + geom_point(aes(color = mouse)) ggplot(mouse_weights, aes(x = time, y = weight)) + geom_point(aes(color = mouse)) + geom_line(aes(group = mouse, color = mouse)) #rm(USPersonalExpenditure) uspe.df <- as.data.frame(USPersonalExpenditure) uspe.df$Category <- rownames(USPersonalExpenditure) uspe <- gather(uspe.df, Year, Amount, -Category) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", aes(fill = Category)) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", aes(fill = Category)) + theme(legend.justification = c(0,1), legend.position = c(0,1)) ggplot(uspe, aes(x = Year, y = Amount)) + geom_bar(stat = "identity", position = "dodge", aes(fill = Category)) + theme(legend.justification = c(0,1), legend.position = c(0,1)) spread(data = mouse.weights, key = mouse, value = weight) spread(ablation, key = CellType, value = Score) abl.united <- unite(ablation, expt_cell, Experiment, CellType, sep = ".") spread(abl.united, expt_cell, Score) separate(abl.united, expt_cell, c("Expt", "Cell"), sep = "\\.") library(dplyr) experiment.log <- data.frame(Experiment = c("E1909", "E1915", "E1921"), Tech = c("Goneril", "Regan", "Cordelia"), stringsAsFactors = TRUE) str(experiment.log) experiment.log inner_join(ablation, experiment.log) save(ablation, file = "ablation.Rdata") load("ablation.Rdata") head(select(msleep, name, sleep_total)) head(msleep[ , c("name", "sleep_total")]) class(msleep) msleep %>% select(name, sleep_total) %>% head msleep %>% select(name, sleep_total) %>% head head(msleep[ , -1]) head(select(msleep, -name)) head(select(msleep, -c(name, sleep_total))) msleep %>% select(-c(name, sleep_total)) %>% head msleep %>% select(starts_with("sl")) %>% head head(msleep[ , startsWith(names(msleep), "sl")]) msleep[msleep$sleep_total >= 16, ] msleep %>% filter(sleep_total >= 16) msleep %>% filter(order %in% c("Perissodactyla", "Primates")) msleep[msleep$order %in% c("Perissodactyla", "Primates"), ] msleep %>% filter(sleep_total >= 16, bodywt >= 1) msleep %>% filter(sleep_total >= 16 & bodywt >= 1) msleep[msleep$sleep_total >= 16 & msleep$bodywt >=1, ] msleep %>% arrange(order) %>% head msleep %>% arrange(desc(order)) %>% head msleep %>% select(name, order, sleep_total) %>% arrange(order, sleep_total) %>% head msleep %>% arrange(order, sleep_total) %>% select(name, order) %>% head ToothGrowth %>% summarize(meanLen = mean(len)) ToothGrowth %>% group_by(supp) %>% summarize(meanLen = mean(len)) ToothGrowth %>% group_by(supp, dose) %>% summarize(meanLen = mean(len), n = n()) ToothGrowth %>% group_by(supp, dose) %>% mutate(norm.len = (len - mean(len))/sd(len), max = max(len)) %>% print(n = 60) ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(mean.score = mean(Score)) %>% spread(CellType, mean.score) ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(the.min = min(Score), the.max = max(Score)) ablation.mean.sd <- ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(the.mean = mean(Score), the.sd = sd(Score)) ggplot(ablation.mean.sd, aes(x = Time, y = the.mean)) + geom_point(size = 4) + geom_errorbar(aes(ymin = the.mean - the.sd, ymax = the.mean + the.sd), width = 0.4) + facet_grid(Measurement ~ CellType) + geom_line() + geom_point(data = ablation, aes(y = Score), color = "blue", shape = 1) + labs(title = "+/- 1 SD") ablation.mean.ci <- ablation %>% select(Time, Measurement, CellType, Score) %>% group_by(Time, Measurement, CellType) %>% summarize(the.mean = mean(Score), lower.limit = t.test(Score)$conf.int[1], upper.limit = t.test(Score)$conf.int[2]) ggplot(ablation.mean.sd, aes(x = Time, y = the.mean)) + geom_point(size = 2) + geom_errorbar(aes(ymin = the.mean - the.sd, ymax = the.mean + the.sd), width = 0.4) + facet_grid(Measurement ~ CellType) + geom_line() + geom_point(data = ablation %>% group_by(Measurement, CellType, Time) %>% mutate(outlier = abs((Score - mean(Score)) / sd(Score)) > 1), aes(y = Score, color = outlier), size = 4, shape = 1) + labs(title = "+/- 1 SD", y = "Mean") + scale_colour_discrete(name = "Outlier Status", labels = c("Within 1 SD", "Outside 1 SD"))