#install and attach the following R packages
library(ggplot2)
library(viridis)
library(hrbrthemes)
library(patchwork)
library("dplyr")
library(reshape2)
library(data.table)

#figure 2 script
	#import data frame "data" from table containing the fields:
		# "total_reads_no_clean" = total raw read counts for each sample from libraries prepared without RNA clean-up
		# "total_reads_clean" = total raw read counts for each sample from libraries prepared with RNA clean-up
		# "perc_mapped_primtrim_reads_raw_no_clean" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared without RNA clean-up
		# "perc_mapped_primtrim_reads_raw_clean" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared with RNA clean-up
		# "perc_cov_no_clean" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared without RNA clean-up
		# "perc_cov_clean" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared with RNA clean-up
		# "mean_depth_no_clean" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared without RNA clean-up
		# "mean_depth_clean" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared with RNA clean-up

figure2a <- ggplot(data, aes(x = total_reads_no_clean, y = total_reads_clean)) +
    geom_point(alpha=0.5, aes(colour="darkblue"), size=3, show.legend = FALSE) +
	theme_ipsum() +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) +
	xlab("Number of sequences (Without RNA clean-up)") +
	ylab("Number of sequences (With RNA clean-up)") +
	ggtitle("Total raw reads") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	scale_y_continuous(limits = c(0, 3e7),breaks = c(0, 1e7, 2e7, 3e7)) +
	scale_x_continuous(limits = c(0, 3e7),breaks = c(0, 1e7, 2e7, 3e7))
	
figure2b <- ggplot(data, aes(x = perc_mapped_primtrim_reads_raw_no_clean, y = perc_mapped_primtrim_reads_raw_clean)) +
    geom_point(alpha=0.5, aes(colour="darkblue"), size=3, show.legend = FALSE) +
	theme_ipsum() +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) +
	xlab("Percentage of sequences (Without RNA clean-up)") +
	ylab("Percentage of sequences (With RNA clean-up)") +
	ggtitle("% reads mapped after primer-trimming") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	ylim(0,100) + xlim(0,100)
	
figure2c <- ggplot(data, aes(x = perc_cov_no_clean, y = perc_cov_clean)) +
    geom_point(alpha=0.5, aes(colour="darkblue"), size=3, show.legend = FALSE) +
	theme_ipsum() +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) +
	xlab("% genome covered (Without RNA clean-up)") +
	ylab("% genome covered (With RNA clean-up)") +
	ggtitle("% genome coverage") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	ylim(0,100) + xlim(0,100)
	
figure2d <- ggplot(data, aes(x = mean_depth_no_clean, y = mean_depth_clean)) +
    geom_point(alpha=0.5, aes(colour="darkblue"), size=3) +
	theme_ipsum() +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5)) +
	xlab("Mean coverage depth (Without RNA clean-up)") +
	ylab("Mean coverage depth (With RNA clean-up)") +
	ggtitle("Mean coverage depth") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	scale_y_continuous(limits = c(0, 20000),breaks = c(0, 5000, 10000, 15000, 20000)) +
	scale_x_continuous(limits = c(0, 20000),breaks = c(0, 5000, 10000, 15000, 20000))

#figure 4 script
	#import data frame "data" from table containing the fields:
		# "total_reads_ampli" = total raw read counts for each sample from libraries prepared using AmpliClean beads
		# "total_reads_omega" = total raw read counts for each sample from libraries prepared using Omega Mag-Bind® beads
		# "perc_mapped_primtrim_reads_raw_ampli" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared using AmpliClean beads
		# "perc_mapped_primtrim_reads_raw_omega" = percentage of raw reads mapped to the SARS-CoV-2 genome after alignment and primer-trimming from libraries prepared using Omega Mag-Bind® beads
		# "perc_cov_ampli" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared using AmpliClean beads
		# "perc_cov_omega" = percentage coverage breadth of SARS-CoV-2 genome from libraries prepared using Omega Mag-Bind® beads
		# "mean_depth_ampli" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared using AmpliClean beads
		# "mean_depth_omega" = mean coverage depth across the SARS-CoV-2 genome from libraries prepared using Omega Mag-Bind® beads

figure4b <- ggplot(data, aes(x = total_reads_ampli, y = total_reads_omega)) +
    geom_point(alpha=0.5, aes(colour=ct_mean), size=3, show.legend = FALSE) +
	scale_color_viridis(direction = -1) +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), 
	panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) +
	xlab("AmpliClean") +
	ylab("Omega MagBind") +
	ggtitle("Total raw reads") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	scale_y_continuous(trans='log10', limits = c(1e2, 1e7)) +
	scale_x_continuous(trans='log10', limits = c(1e2, 1e7))
	
figure4c <- ggplot(data, aes(x = perc_mapped_primtrim_reads_raw_ampli, y = perc_mapped_primtrim_reads_raw_omega)) +
    geom_point(alpha=0.5, aes(colour=ct_mean), size=3, show.legend = FALSE) +
	scale_color_viridis(direction = -1) +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), 
	panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) +
	xlab("AmpliClean") +
	ylab("Omega MagBind") +
	ggtitle("% reads mapped after primer-trimming") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	ylim(0,100) + xlim(0,100)
	
figure4d <- ggplot(data, aes(x = perc_cov_ampli, y = perc_cov_omega)) +
    geom_point(alpha=0.5, aes(colour=ct_mean), size=3, show.legend = FALSE) +
	scale_color_viridis(direction = -1) +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), 
	panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) +
	xlab("AmpliClean") +
	ylab("Omega MagBind") +
	ggtitle("% genome coverage") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	ylim(0,100) + xlim(0,100)
	
figure4e <- ggplot(data, aes(x = mean_depth_ampli, y = mean_depth_omega)) +
    geom_point(alpha=0.5, aes(colour=ct_mean), size=3) +
	scale_color_viridis(direction = -1) +
	theme(axis.title.x = element_text(size=10), axis.title.y = element_text(size=10), plot.title = element_text(hjust = 0.5), 
	panel.background = element_rect(fill = "white"), axis.line = element_line(colour = "black"), panel.grid=element_line(colour = "lightgrey")) +
	xlab("AmpliClean") +
	ylab("Omega MagBind") +
	ggtitle("Mean coverage depth") +
	geom_abline(intercept = 0, slope = 1, linetype="dashed", color="red") +
	scale_y_continuous(trans='log10', limits = c(1, 1e5)) +
	scale_x_continuous(trans='log10', limits = c(1, 1e5))

#figure 6 script
	#import data frame "data1" from table containing the fields:
		#"sample" = sample id of synthetic SARS-CoV-2 RNA sample 
		#"perc_cov" = percentage coverage breadth of SARS-CoV-2 genome from sequencing of "sample"
	#import data frame "data2" from table containing the fields:
		#"sample" = sample id of synthetic SARS-CoV-2 RNA sample 
		#"variant" = SARS-CoV-2 variant in sample
		#"expected_abundance" = expected abundance of "variant" in "sample"
	#import data frame "data3" from table containing the fields:
		#"sample" = sample id of synthetic SARS-CoV-2 RNA sample 
		#"variant" = SARS-CoV-2 variant in sample
		#"abundance" = measured abundance of "variant" in "sample" using freyja

sample_order=c('covmix_1',	'covmix_2',	'covmix_3',	'covmix_4',	'covmix_5',	'covmix_6',	'covmix_7',	'covmix_8',	'covmix_9',	'covmix_10',	'covmix_11',	'covmix_12',	'covmix_13',	'covmix_14',	'covmix_15',	'covmix_16',	'covmix_17',	'covmix_18',	'covmix_19',	'covmix_20',	'covmix_21',	'covmix_22',	'covmix_23',	'covmix_24',	'covmix_25',	'covmix_26',	'covmix_27',	'covmix_28',	'covmix_29',	'covmix_30',	'covmix_31',	'covmix_32',	'covmix_33',	'covmix_34',	'covmix_35',	'covmix_36',	'covmix_37',	'covmix_38',	'covmix_39',	'covmix_40',	'covmix_41',	'covmix_42',	'covmix_43',	'covmix_44',	'covmix_45',	'covmix_46',	'alpha_sd_1',	'alpha_sd_2',	'alpha_sd_3',	'alpha_sd_4',	'alpha_sd_5',	'alpha_sd_6',	'alpha_sd_7',	'delta_sd_1',	'delta_sd_2',	'delta_sd_3',	'delta_sd_4',	'delta_sd_5',	'delta_sd_6',	'delta_sd_7',	'alpha_delta_sd_1',	'alpha_delta_sd_2',	'alpha_delta_sd_3',	'alpha_delta_sd_4',	'alpha_delta_sd_5',	'alpha_delta_sd_6',	'alpha_delta_sd_7',	'alpha_delta_sd_8',	'alpha_delta_sd_9',	'alpha_delta_sd_10',	'alpha_delta_sd_11',	'alpha_delta_sd_12',	'alpha_delta_sd_13',	'alpha_delta_sd_14')

data1$sample <- factor(data1$sample, levels=c(sample_order))
	
figure6a <- ggplot(data1, aes(y = perc_cov, x = sample)) +
    geom_bar(stat = "identity",fill="#440154") +
	theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_text(size=10), ) +
	ylab("Genome coverage (%)") +
  scale_y_continuous(limits=c(0, 100), expand = c(0, 0))

data2$sample <- factor(data2$sample, levels=c(sample_order))

figure6b <- ggplot(data2, aes(fill = variant, y = expected_abundance, x = sample)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_manual(values = c("#440154", "#3b528b", "#21918c")) +
  theme(legend.title=element_text(face="bold"), plot.title = element_text(size=20, hjust = 0.5)) +
  theme(axis.text.x = element_blank(), axis.title.x = element_blank(), axis.ticks.x = element_blank(), axis.title.y = element_text(size=10)) +
  ylab("Expected Abundance") +
  scale_y_continuous(limits=c(0, 1), expand = c(0, 0))

data3$sample <- factor(data3$sample, levels=c(sample_order))

figure6c <- ggplot(data3, aes(fill = variant, y = abundance, x = sample)) +
  geom_bar(position = "fill", stat = "identity") +
  scale_fill_manual(values = c("#440154", "#3b528b", "#21918c", "#5ec962", "#fde725")) +
  theme(legend.title=element_text(face="bold")) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1), axis.title.x = element_blank(), axis.title.y = element_text(size=10)) +
  ylab("Measured Abundance") +
  scale_y_continuous(limits=c(0, 1), expand = c(0, 0))
	
figure6 <- (figure6a / figure6b / figure6c) +
plot_layout(heights = c(1, 2, 2)) + 
plot_annotation(tag_levels = 'a') &
theme(plot.tag = element_text(size = 16)) 

#figure 8 script
#import data frame "data1" from table containing the fields:
	#"sample" = sample id 
	#"insert1"-"insert154" = columns containing the median coverage depth for each amplicon insert region output from mosdepth
#import data frame "data2" from table containing the fields:
	#"sample" = sample id
	#"ct_mean" = mean Ct value from repeats of SARS-CoV-2 N1 region RT-qPCR
	
for ( col in 1:ncol(data1)){
    colnames(data1)[col] <-  sub("insert", "", colnames(data1)[col])
}

regions=colnames(data1[,-1])
data1_log <- data1 %>% mutate(across(regions, ~ log10(.x+1)))

data1_log$sums = apply(data1_log[,c(2:155)], 1, sum)

data1_log <- data1_log[order(data1_log$sums),]
data1_log$sample <- factor(data1_log$sample, levels=data1_log$sample,ordered = TRUE)

data1_log.melt <- melt(as.data.table(data1_log), id.vars = c(1),
                measure.vars = regions)

data2<- merge(data2, data1_log[, c("sample", "sums")], by="sample")

data2 <- data2[order(data2$sums),]
data2$sample <- factor(data2$sample, levels=data2$sample[order(data2$sums)],ordered = TRUE)


figure8a <- ggplot(data1_log.melt, aes(variable, sample, fill= value)) + 
	geom_tile() +
	scale_fill_viridis(discrete=FALSE, "log10(median coverage depth +1)") +
	theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1,size=7), axis.title.x = element_text(size = 14),
	axis.text.y = element_text(size = 8), axis.title.y = element_text(size = 14),	
	legend.direction="horizontal", legend.position="bottom", legend.key.size = unit(0.6, 'cm'), 
		  legend.title = element_text(size=14), legend.text = element_text(size=12)) +
	xlab("Amplicon") + ylab("Sample")
							   
figure8b <- ggplot(data2, aes(y = ct_mean, x = sample, fill= ct_mean)) + 
	geom_bar(stat = "identity") +
	scale_fill_viridis(discrete=FALSE, direction=-1) +
	theme(axis.title.y = element_blank(), axis.text.y = element_blank(),
          axis.ticks.y = element_blank(), axis.text.x = element_text(size = 11.2), 
          axis.title.x = element_text(size = 14), 
		  legend.direction="horizontal", legend.position="bottom", legend.key.size = unit(0.6, 'cm'), 
		  legend.title = element_blank(), legend.text = element_text(size=12)) +
	ylab("Mean Ct") + coord_flip()
	
figure8 <- (figure8a | figure8b) + plot_layout(widths = c(16, 0.8))