############################################################ #Green and Losada—Applications in Plant Sciences 2023 11(5)—Data Supplement S2 #DOI 10.1002/aps3.11551 #Appendix S2. # The following functions were tested under R version 4.2.2 # and depend on the EBImage package version 4.40.0. # output of sessionInfo() follows #R version 4.2.2 (2022-10-31) #Platform: x86_64-pc-linux-gnu (64-bit) #Running under: Debian GNU/Linux 10 (buster) #Matrix products: default #BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.8.0 #LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.8.0 #locale: # [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C # [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 # [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 # [7] LC_PAPER=en_US.UTF-8 LC_NAME=C # [9] LC_ADDRESS=C LC_TELEPHONE=C #[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C #attached base packages: #[1] stats graphics grDevices utils datasets methods base #other attached packages: #[1] EBImage_4.40.0 #loaded via a namespace (and not attached): # [1] locfit_1.5-9.7 lattice_0.20-45 png_0.1-8 # [4] fftwtools_0.9-11 digest_0.6.31 bitops_1.0-7 # [7] tiff_0.1-11 grid_4.2.2 rlang_1.0.6 #[10] htmlwidgets_1.6.1 RCurl_1.98-1.9 jpeg_0.1-10 #[13] abind_1.4-5 fastmap_1.1.0 compiler_4.2.2 #[16] BiocGenerics_0.44.0 htmltools_0.5.4 ############################################################ # header require(EBImage) source('/path/to/functions.R') ############################################################ # recreation of plots #FIGURE 2 raw <- readImage('/path/to/image_name.tiff') greyscale <- normalize(channel(raw, 'grey')) binary <- clean(greyscale, thresh_win = 30, thresh_sense = 0.01, schmutz = 81) areole_sizes <- computeFeatures.shape(bwlabel(binary))[,'s.area'] hist(areole_sizes, main = 'Areole size distribution', xlab = 'Areole area in pixels', freq = FALSE) lines(density(areole_sizes)) #FIGURE 3 sizingt <- st(binary, max_mask = floor(sqrt(mean(areole_sizes)))) plot(as.table(table(sizingt$sizes)[-1]), type = 'h', ylab = 'Frequency', xlab = 'Diameter in Pixels', main = 'Sizing transform') #FIGURE 4 distm <- distmap(binary) hist(as.numeric(distm), main = 'Areole distance map', freq = FALSE, xlab = 'Euclidean distance from nearest vein in pixels', ylab = 'Frequency') hist(log(as.numeric(distm)), main = 'Areole distance map', freq = FALSE, border = 'grey', xlab = 'Euclidean distance from nearest vein in log pixels') ############################################################ # batch processing # # The images processed here were published with # Green, W. A., S. A. Little, C. A. Price, S. L. Wing, S. Y. Smith, B. Kotrc, and G. Doria # (2014) Reading the Leaves: A comparison of leaf rank and automated areole measurement # for quantifying aspects of leaf venation {\it Applications in Plant Sciences} 2(8):1400006. # and are available as a digital archive via DataDryad at: # https://datadryad.org/stash/dataset/doi:10.5061/dryad.8h022 setwd('/path/to/directory/with/images/to/process') # a trial set of images, to run before committing to the full script #proc <- c('0004.jpg', '0005.jpg', '0078.jpg', '0079.jpg') proc <- dir()[grep('.jpg', dir())] # set to process all .jpg images in the working directory ########### #create masks # Note: images 0089.jpg, 0093.jpg, and 0094.jpg crash clean()! # It is likely that this could be fixed either by tuning the # default arguments to clean() or pre-processing these images. # In addition, several other images do not crash any of the # scripts, but produce output that is biologically meaningless # because, for instance: in the case of 0081.jpg and 0082.jpg, # the measurements relate to the pattern of trichomes, not # veins visible on the leaf. cat('Creating mask for file:') for(i in 94:length(proc)){ cat(paste(proc[i], '. ')) raw <- readImage(proc[i]) #read in the raw image mask <- clean(raw) #normalize, convert to mask and clean image writeImage(mask, gsub('jpg', 'msk.tiff', proc[i])) } ########## # calculate summary stats for areole sizes and distance maps proc <- dir()[grep('.msk.tiff', dir())] # this updates the processing list to include # only files for which clean() produced a mask #not needed in this version when st() is called in a separate for() loop #max_balls <- rep(0, times = length(proc)) #calculate areole size and distance map summary stats tab <- as.data.frame(matrix(0, nrow = length(proc), ncol = 4)) names(tab) <- c('Image', 'MEAN_AS', 'Mod_ST', 'Mod_Log_DM') cat('Calculating areole sizes and distance transform for mask:') for(i in 1:length(proc)){ cat(paste(proc[i], '. ')) #not needed in this version when clean() is called in a separate for() loop # raw <- readImage(proc[i]) #read in the raw image # mask <- clean(raw) #normalize, convert to mask and clean image mask <- readImage(proc[i]) # calculate stats as <- computeFeatures.shape(bwlabel(mask))[,'s.area'] #not needed in this version when st() is called in a separate for() loop # max_balls[i] <- floor(sqrt(mean(as))) dm <- distmap(mask) # determine the mode of the smoothed density of the log-transformed distance map dm_den <- density(log(as.numeric(dm)), adjust = 5) log_mod_dm <- dm_den$x[dm_den$y == max(dm_den$y)] ### A diagnostic plot, commented out for the purposes of the batch run. # In a full analysis, this should be examined for each image to confirm # that the default smoothing of the density function produces reasonable results #hist(log(as.numeric(dm)), freq = FALSE) #lines(density(log(as.numeric(dm)), adjust = 5), lwd = 3) #abline(v = log_mod_dm, col = 'red') ### tab[i,] <- c(proc[i], mean(as), NA, log_mod_dm) # tab is a dataframe with 4 columns, holding the image name, mean areole size (in pixels), # modal sizing transform diameter (in pixels), and log-transformed mode of the distance map density write.table(tab, 'part.table.csv') } ########## # calculate summary stats for sizing transform # The sizing tranform function, st(), has led to inconsistent but troubling out-of-memory issues. # The solution for this batch script involved breaking it up into separate for() loops for the # different calculations, removing all the intermediate storage objects, which could be used for # plotting individual file outputs, and persistent explicit removal of objects folloed by calls # of gc() to clear memory. Despite these modifications, the final run (of about 230 images) still # seems to show a memory leak, as percent memory usage inched up from <10% to >50% over the course # of the loop. # Clean out unnecessary objects rm(as) rm(clean) rm(dm) rm(dm_den) rm(i) rm(log_mod_dm) rm(mask) rm(skeleton) rm(thin) gc() # If necessary, recreate the variables tab and max_balls tab <- read.table('part.table.csv') max_balls <- floor(sqrt(mean(tab$MEAN_AS)) # this is the maximum ball size for the sizing transform to try # based on the mean areole size; allowing st() to try larger # balls will not change the modal ball size, but merely slow # down the script. cat('Calculating sizing transform for mask #:') for(i in 1:length(proc)){ cat(paste('\n', proc[i], '\n')) if(i > 1) tab <- read.table('table.csv') mask <- readImage(gsub('jpg', 'msk.tiff', proc[i])) sz <- st(mask, max_mask = max_balls[i]) rm(mask) gc() # determine which of the sizing transform kernels is the modal one sz_tab <- as.table(table(sz$sizes)[-1]) sz_mod_kern <- names(sz_tab)[sz_tab == max(sz_tab)] tab[i,3] <- sz_mod_kern rm(sz_tab) rm(sz_mod_kern) gc() write.table(tab, 'table.csv') } # Note that the output table is written to disk each loop; the final version ran without crashing, # but this feature is left in place so that the loop can be restarted from the point at which it # crashed if it should crash on a different platform or with a larger number of images. ############################################################ # extraction of information from metadata.csv in Green et al. 2014 digital archive # md <- readtable('metadata.csv', header = TRUE, sep = '\t') str(md) table(md$WOODY) length(unique(na.omit(md$REV.FAM))) length(unique(na.omit(md$REV.GEN))) length(unique(paste(na.omit(md$REV.GEN), na.omit(md$REV.SP))))