#install and attach the following R packages library(ggplot2) library(viridis) library(hrbrthemes) library(patchwork) library("dplyr") library(reshape2) library(data.table) #figure 2 script #import data frame "data" from table containing the fields: # "total_reads_no_clean" = total raw read counts for each sample from libraries prepared without RNA clean-up # "total_reads_clean" = total raw read counts for each sample from libraries prepared with RNA clean-up # "perc_mapped_primtrim_reads_raw_no_clean" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared without RNA clean-up # "perc_mapped_primtrim_reads_raw_clean" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared with RNA clean-up # "perc_cov_no_clean" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared without RNA clean-up # "perc_cov_clean" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared with RNA clean-up # "mean_depth_no_clean" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared without RNA clean-up # "mean_depth_clean" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared with RNA clean-up figure2a <- ggplot(data, aes(x = total_reads_no_clean, y = total_reads_clean)) + geom_point(alpha=0.5, aes(colour="darkblue"), size=3, show.legend = FALSE) + theme_ipsum() + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) + xlab("Number of sequences (Without RNA clean-up)") + ylab("Number of sequences (With RNA clean-up)") + ggtitle("Total raw reads") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + scale_y_continuous(limits = c(0, 3e7),breaks = c(0, 1e7, 2e7, 3e7)) + scale_x_continuous(limits = c(0, 3e7),breaks = c(0, 1e7, 2e7, 3e7)) figure2b <- ggplot(data, aes(x = perc_mapped_primtrim_reads_raw_no_clean, y = perc_mapped_primtrim_reads_raw_clean)) + geom_point(alpha=0.5, aes(colour="darkblue"), size=3, show.legend = FALSE) + theme_ipsum() + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) + xlab("Percentage of sequences (Without RNA clean-up)") + ylab("Percentage of sequences (With RNA clean-up)") + ggtitle("% reads mapped after primer-trimming") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + ylim(0,100) + xlim(0,100) figure2c <- ggplot(data, aes(x = perc_cov_no_clean, y = perc_cov_clean)) + geom_point(alpha=0.5, aes(colour="darkblue"), size=3, show.legend = FALSE) + theme_ipsum() + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) + xlab("% genome covered (Without RNA clean-up)") + ylab("% genome covered (With RNA clean-up)") + ggtitle("% genome coverage") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + ylim(0,100) + xlim(0,100) figure2d <- ggplot(data, aes(x = mean_depth_no_clean, y = mean_depth_clean)) + geom_point(alpha=0.5, aes(colour="darkblue"), size=3) + theme_ipsum() + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) + xlab("Mean coverage depth (Without RNA clean-up)") + ylab("Mean coverage depth (With RNA clean-up)") + ggtitle("Mean coverage depth") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + scale_y_continuous(limits = c(0, 20000),breaks = c(0, 5000, 10000, 15000, 20000)) + scale_x_continuous(limits = c(0, 20000),breaks = c(0, 5000, 10000, 15000, 20000)) #figure 4 script #import data frame "data" from table containing the fields: # "total_reads_ampli" = total raw read counts for each sample from libraries prepared using AmpliClean beads # "total_reads_omega" = total raw read counts for each sample from libraries prepared using Omega Mag-Bind® beads # "perc_mapped_primtrim_reads_raw_ampli" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared using AmpliClean beads # "perc_mapped_primtrim_reads_raw_omega" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared using Omega Mag-Bind® beads # "perc_cov_ampli" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared using AmpliClean beads # "perc_cov_omega" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared using Omega Mag-Bind® beads # "mean_depth_ampli" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared using AmpliClean beads # "mean_depth_omega" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared using Omega Mag-Bind® beads figure4b <- ggplot(data, aes(x = total_reads_ampli, y = total_reads_omega)) + geom_point(alpha=0.5, aes(colour=ct_mean), size=3, show.legend = FALSE) + scale_color_viridis(direction = -1) + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) + xlab("AmpliClean") + ylab("Omega MagBind") + ggtitle("Total raw reads") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + scale_y_continuous(trans='log10', limits = c(1e2, 1e7)) + scale_x_continuous(trans='log10', limits = c(1e2, 1e7)) figure4c <- ggplot(data, aes(x = perc_mapped_primtrim_reads_raw_ampli, y = perc_mapped_primtrim_reads_raw_omega)) + geom_point(alpha=0.5, aes(colour=ct_mean), size=3, show.legend = FALSE) + scale_color_viridis(direction = -1) + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) + xlab("AmpliClean") + ylab("Omega MagBind") + ggtitle("% reads mapped after primer-trimming") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + ylim(0,100) + xlim(0,100) figure4d <- ggplot(data, aes(x = perc_cov_ampli, y = perc_cov_omega)) + geom_point(alpha=0.5, aes(colour=ct_mean), size=3, show.legend = FALSE) + scale_color_viridis(direction = -1) + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) + xlab("AmpliClean") + ylab("Omega MagBind") + ggtitle("% genome coverage") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + ylim(0,100) + xlim(0,100) figure4e <- ggplot(data, aes(x = mean_depth_ampli, y = mean_depth_omega)) + geom_point(alpha=0.5, aes(colour=ct_mean), size=3) + scale_color_viridis(direction = -1) + theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) + xlab("AmpliClean") + ylab("Omega MagBind") + ggtitle("Mean coverage depth") + geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") + scale_y_continuous(trans='log10', limits = c(1, 1e5)) + scale_x_continuous(trans='log10', limits = c(1, 1e5)) #figure 6 script #import data frame "data1" from table containing the fields: #"sample" = sample id of synthetic SARS-CoV-2 RNA sample #"perc_cov" = percentage coverage breadth of SARS-CoV-2 genome from sequencing of "sample" #import data frame "data2" from table containing the fields: #"sample" = sample id of synthetic SARS-CoV-2 RNA sample #"variant" = SARS-CoV-2 variant in sample #"expected_abundance" = expected abundance of "variant" in "sample" #import data frame "data3" from table containing the fields: #"sample" = sample id of synthetic SARS-CoV-2 RNA sample #"variant" = SARS-CoV-2 variant in sample #"abundance" = measured abundance of "variant" in "sample" using freyja sample_order=c('covmix_1', 'covmix_2', 'covmix_3', 'covmix_4', 'covmix_5', 'covmix_6', 'covmix_7', 'covmix_8', 'covmix_9', 'covmix_10', 'covmix_11', 'covmix_12', 'covmix_13', 'covmix_14', 'covmix_15', 'covmix_16', 'covmix_17', 'covmix_18', 'covmix_19', 'covmix_20', 'covmix_21', 'covmix_22', 'covmix_23', 'covmix_24', 'covmix_25', 'covmix_26', 'covmix_27', 'covmix_28', 'covmix_29', 'covmix_30', 'covmix_31', 'covmix_32', 'covmix_33', 'covmix_34', 'covmix_35', 'covmix_36', 'covmix_37', 'covmix_38', 'covmix_39', 'covmix_40', 'covmix_41', 'covmix_42', 'covmix_43', 'covmix_44', 'covmix_45', 'covmix_46', 'alpha_sd_1', 'alpha_sd_2', 'alpha_sd_3', 'alpha_sd_4', 'alpha_sd_5', 'alpha_sd_6', 'alpha_sd_7', 'delta_sd_1', 'delta_sd_2', 'delta_sd_3', 'delta_sd_4', 'delta_sd_5', 'delta_sd_6', 'delta_sd_7', 'alpha_delta_sd_1', 'alpha_delta_sd_2', 'alpha_delta_sd_3', 'alpha_delta_sd_4', 'alpha_delta_sd_5', 'alpha_delta_sd_6', 'alpha_delta_sd_7', 'alpha_delta_sd_8', 'alpha_delta_sd_9', 'alpha_delta_sd_10', 'alpha_delta_sd_11', 'alpha_delta_sd_12', 'alpha_delta_sd_13', 'alpha_delta_sd_14') data1$sample <- factor(data1$sample, levels=c(sample_order)) figure6a <- ggplot(data1, aes(y = perc_cov, x = sample)) + geom_bar(stat = "identity",fill="#440154") + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_text(size=10), ) + ylab("Genome coverage (%)") + scale_y_continuous(limits=c(0, 100), expand = c(0, 0)) data2$sample <- factor(data2$sample, levels=c(sample_order)) figure6b <- ggplot(data2, aes(fill = variant, y = expected_abundance, x = sample)) + geom_bar(position = "fill", stat = "identity") + scale_fill_manual(values = c("#440154", "#3b528b", "#21918c")) + theme(legend.title=element_text(face="bold"), plot.title = element_text(size=20, hjust = 0.5)) + theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_text(size=10)) + ylab("Expected Abundance") + scale_y_continuous(limits=c(0, 1), expand = c(0, 0)) data3$sample <- factor(data3$sample, levels=c(sample_order)) figure6c <- ggplot(data3, aes(fill = variant, y = abundance, x = sample)) + geom_bar(position = "fill", stat = "identity") + scale_fill_manual(values = c("#440154", "#3b528b", "#21918c", "#5ec962", "#fde725")) + theme(legend.title=element_text(face="bold")) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), axis.title.x = element_blank(), axis.title.y = element_text(size=10)) + ylab("Measured Abundance") + scale_y_continuous(limits=c(0, 1), expand = c(0, 0)) figure6 <- (figure6a / figure6b / figure6c) + plot_layout(heights = c(1, 2, 2)) + plot_annotation(tag_levels = 'a') & theme(plot.tag = element_text(size = 16)) #figure 8 script #import data frame "data1" from table containing the fields: #"sample" = sample id #"insert1"-"insert154" = columns containing the median coverage depth for each amplicon insert region output from mosdepth #import data frame "data2" from table containing the fields: #"sample" = sample id #"ct_mean" = mean Ct value from repeats of SARS-CoV-2 N1 region RT-qPCR for ( col in 1:ncol(data1)){ colnames(data1)[col] <- sub("insert", "", colnames(data1)[col]) } regions=colnames(data1[,-1]) data1_log <- data1 %>% mutate(across(regions, ~ log10(.x+1))) data1_log$sums = apply(data1_log[,c(2:155)], 1, sum) data1_log <- data1_log[order(data1_log$sums),] data1_log$sample <- factor(data1_log$sample, levels=data1_log$sample,ordered = TRUE) data1_log.melt <- melt(as.data.table(data1_log), id.vars = c(1), measure.vars = regions) data2<- merge(data2, data1_log[, c("sample", "sums")], by="sample") data2 <- data2[order(data2$sums),] data2$sample <- factor(data2$sample, levels=data2$sample[order(data2$sums)],ordered = TRUE) figure8a <- ggplot(data1_log.melt, aes(variable, sample, fill= value)) + geom_tile() + scale_fill_viridis(discrete=FALSE, "log10(median coverage depth +1)") + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1,size=7), axis.title.x = element_text(size = 14), axis.text.y = element_text(size = 8), axis.title.y = element_text(size = 14), legend.direction="horizontal", legend.position="bottom", legend.key.size = unit(0.6, 'cm'), legend.title = element_text(size=14), legend.text = element_text(size=12)) + xlab("Amplicon") + ylab("Sample") figure8b <- ggplot(data2, aes(y = ct_mean, x = sample, fill= ct_mean)) + geom_bar(stat = "identity") + scale_fill_viridis(discrete=FALSE, direction=-1) + theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.text.x = element_text(size = 11.2), axis.title.x = element_text(size = 14), legend.direction="horizontal", legend.position="bottom", legend.key.size = unit(0.6, 'cm'), legend.title = element_blank(), legend.text = element_text(size=12)) + ylab("Mean Ct") + coord_flip() figure8 <- (figure8a | figure8b) + plot_layout(widths = c(16, 0.8))