Listing 2. Multivariable analysis code. |
#to make a data copy. Z <- datapla2 #to disable factors columns (sterile, infill, Treatment) into the dataset. datapla2$Tr <- NULL #to normalize data. Data range between 0 and 1 for dimensionless comparison [70]. set.seed(250) #to make the results reproducible data.norm <- rnorm(nrow(datapla2)) # to shuffle rows using normal distribution. datapla2 <- datapla2[order(data.norm),] #data reorganization by the vector. normalize <- function(x){ + return((x-min(x))/(max(x)-min(x)))} # to define function. Data.N<-as.data.frame(lapply(datapla2[,c(1,2,3,4,5,6,7,8,9,10,11,12)], normalize)) #to apply the normalize function in data. #libraries [71,72,73,74] library(factoextra) library(cluster) library(ggplot2) library (stats) #clustering data with hierarchical method [64,75,76] #to define linkage methods m m <- c(“average”, “single”, “complete”, “ward”) names(m) <- c(“average”, “single”, “complete”, “ward”) #function to compute agglomerative coefficient ac <- function(x) {agnes(data.N, method = x)$ac} sapply(m, ac) # calculate agglomerative coefficient near to 1. #to calculate number of clusters k vs gap statistic, iterations B ≥ 500. gap_stat <- clusGap(data.N, FUN = hcut, nstart = 25, K.max = 10, B = 500) fviz_gap_stat(gap_stat) #results depend on the biggest jump in within-cluster distance after uniformity. #distance matrix calculation. res.dist = dist(x = data.N, method = “euclidean”) #hierarchical method. res.hc <- hclust(d = res.dist, method = “ward.D”) # Cluster dendrogram. fviz_dend(x = res.hc, cex = 0.7, lwd = 0.7) # Principal component analysis PCA plot. fviz_cluster(object = list(data=data.N, cluster=cutree(res.hc, k=5))) # to determine cluster by sample. g <-cutree(res.hc, k=5) table(g) g_pla <- cbind(data.N[,-1],g) print(g_pla) |