################################################################################################################################### # Correlation analysis: R script for the paper "Machine learning approaches identify male body size as the most accurate predictor of species richness" # Species richness vs. minimum male body size (both variables are numeric) ################################################################################################################################### library(ggpubr) #set your working directory #export RF excel sheet from the Additional file 2 as comma-separated values corrALL <- read.csv("Additional file 2 - RF.csv", sep=";", stringsAsFactors = T) corrALL <- data.frame(corrALL[,-1], row.names = corrALL[,1]) correlation.data <- corrALL[,c(9,27)] shapiro.test(correlation.data$Species.richness.numeric) shapiro.test(correlation.data$Body.Male.MIN) ggqqplot(correlation.data$Species.richness.numeric, ylab = "Number of species in a genus") ggqqplot(correlation.data$Body.Male.MIN, ylab = "Minimal male body size [mm]") cor.test(correlation.data$Species.richness.numeric, correlation.data$Body.Male.MIN, method="spearman") p <- ggscatter(correlation.data, x = "Species.richness.numeric", y = "Body.Male.MIN", add = "reg.line", conf.int = TRUE, cor.coef = TRUE, cor.method = "spearman", cor.coef.coord = c(280,55), xlab = "Number of species within a spider genus", ylab = "Minimal male body size in a spider genus", ylim = c(0,58)) p + scale_y_continuous(expand = c(0, 0))