############################################################
#Green and Losada—Applications in Plant Sciences 2023 11(5)—Data Supplement S2
#DOI 10.1002/aps3.11551

#Appendix S2.

# The following functions were tested under R version 4.2.2
#  and depend on the EBImage package version 4.40.0.

# output of sessionInfo() follows
#R version 4.2.2 (2022-10-31)
#Platform: x86_64-pc-linux-gnu (64-bit)
#Running under: Debian GNU/Linux 10 (buster)

#Matrix products: default
#BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.8.0
#LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.8.0

#locale:
# [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
# [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
# [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
# [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
# [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

#attached base packages:
#[1] stats     graphics  grDevices utils     datasets  methods   base     

#other attached packages:
#[1] EBImage_4.40.0

#loaded via a namespace (and not attached):
# [1] locfit_1.5-9.7      lattice_0.20-45     png_0.1-8          
# [4] fftwtools_0.9-11    digest_0.6.31       bitops_1.0-7       
# [7] tiff_0.1-11         grid_4.2.2          rlang_1.0.6        
#[10] htmlwidgets_1.6.1   RCurl_1.98-1.9      jpeg_0.1-10        
#[13] abind_1.4-5         fastmap_1.1.0       compiler_4.2.2     
#[16] BiocGenerics_0.44.0 htmltools_0.5.4  

############################################################
# header

require(EBImage)
source('/path/to/functions.R')

############################################################
# recreation of plots

#FIGURE 2

raw <- readImage('/path/to/image_name.tiff')
greyscale <- normalize(channel(raw, 'grey'))
binary <- clean(greyscale, thresh_win = 30, thresh_sense = 0.01,
                schmutz = 81)
areole_sizes <- computeFeatures.shape(bwlabel(binary))[,'s.area']
hist(areole_sizes, main = 'Areole size distribution',
     xlab = 'Areole area in pixels', freq = FALSE)
lines(density(areole_sizes))

#FIGURE 3
sizingt <- st(binary, max_mask = floor(sqrt(mean(areole_sizes))))
plot(as.table(table(sizingt$sizes)[-1]), type = 'h', ylab = 'Frequency',
     xlab = 'Diameter in Pixels', main = 'Sizing transform')

#FIGURE 4
distm <- distmap(binary)
hist(as.numeric(distm), main = 'Areole distance map', freq = FALSE,
     xlab = 'Euclidean distance from nearest vein in pixels',
     ylab = 'Frequency')
hist(log(as.numeric(distm)), main = 'Areole distance map', freq = FALSE,
     border = 'grey', xlab = 'Euclidean distance from nearest vein in log pixels')

############################################################
# batch processing
#

# The images processed here were published with
#  Green, W. A., S. A. Little, C. A. Price, S. L. Wing, S. Y. Smith, B. Kotrc, and G. Doria
#  (2014) Reading the Leaves: A comparison of leaf rank and automated areole measurement
#  for quantifying aspects of leaf venation {\it Applications in Plant Sciences} 2(8):1400006.
#  and are available as a digital archive via DataDryad at:
# https://datadryad.org/stash/dataset/doi:10.5061/dryad.8h022

setwd('/path/to/directory/with/images/to/process')

# a trial set of images, to run before committing to the full script
#proc <- c('0004.jpg', '0005.jpg', '0078.jpg', '0079.jpg')

proc <- dir()[grep('.jpg', dir())] # set to process all .jpg images in the working directory

###########
#create masks

# Note: images 0089.jpg, 0093.jpg, and 0094.jpg crash clean()!
#  It is likely that this could be fixed either by tuning the
#  default arguments to clean() or pre-processing these images.
# In addition, several other images do not crash any of the
#  scripts, but produce output that is biologically meaningless
#  because, for instance: in the case of 0081.jpg and 0082.jpg,
#  the measurements relate to the pattern of trichomes, not
#  veins visible on the leaf.

cat('Creating mask for file:')
for(i in 94:length(proc)){
 cat(paste(proc[i], '. '))
 raw <- readImage(proc[i])	#read in the raw image
 mask <- clean(raw)		#normalize, convert to mask and clean image
 writeImage(mask, gsub('jpg', 'msk.tiff', proc[i]))
}

##########
# calculate summary stats for areole sizes and distance maps

proc <- dir()[grep('.msk.tiff', dir())] # this updates the processing list to include
                                        #  only files for which clean() produced a mask

#not needed in this version when st() is called in a separate for() loop
#max_balls <- rep(0, times = length(proc))

#calculate areole size and distance map summary stats
tab <- as.data.frame(matrix(0, nrow = length(proc), ncol = 4))
names(tab) <- c('Image', 'MEAN_AS', 'Mod_ST', 'Mod_Log_DM')

cat('Calculating areole sizes and distance transform for mask:')
for(i in 1:length(proc)){
 cat(paste(proc[i], '. '))

#not needed in this version when clean() is called in a separate for() loop
# raw <- readImage(proc[i])	#read in the raw image
# mask <- clean(raw)		#normalize, convert to mask and clean image

 mask <- readImage(proc[i])

# calculate stats
 as <- computeFeatures.shape(bwlabel(mask))[,'s.area']
#not needed in this version when st() is called in a separate for() loop
# max_balls[i] <- floor(sqrt(mean(as)))

 dm <- distmap(mask)

# determine the mode of the smoothed density of the log-transformed distance map
 dm_den <- density(log(as.numeric(dm)), adjust = 5)
 log_mod_dm <- dm_den$x[dm_den$y == max(dm_den$y)]

### A diagnostic plot, commented out for the purposes of the batch run.
#   In a full analysis, this should be examined for each image to confirm
#    that the default smoothing of the density function produces reasonable results
#hist(log(as.numeric(dm)), freq = FALSE)
#lines(density(log(as.numeric(dm)), adjust = 5), lwd = 3)
#abline(v = log_mod_dm, col = 'red')
###

 tab[i,] <- c(proc[i], mean(as), NA, log_mod_dm)
# tab is a dataframe with 4 columns, holding the image name, mean areole size (in pixels),
# modal sizing transform diameter (in pixels), and log-transformed mode of the distance map density
 write.table(tab, 'part.table.csv')
}

##########
# calculate summary stats for sizing transform

# The sizing tranform function, st(), has led to inconsistent but troubling out-of-memory issues.
#  The solution for this batch script involved breaking it up into separate for() loops for the 
#  different calculations, removing all the intermediate storage objects, which could be used for
#  plotting individual file outputs, and persistent explicit removal of objects folloed by calls
#  of gc() to clear memory. Despite these modifications, the final run (of about 230 images) still
#  seems to show a memory leak, as percent memory usage inched up from <10% to >50% over the course
#  of the loop.

# Clean out unnecessary objects
rm(as)
rm(clean)
rm(dm)
rm(dm_den)
rm(i)
rm(log_mod_dm)
rm(mask)
rm(skeleton)
rm(thin)
gc()

# If necessary, recreate the variables tab and max_balls
tab <- read.table('part.table.csv')
max_balls <- floor(sqrt(mean(tab$MEAN_AS)) # this is the maximum ball size for the sizing transform to try
                                           #  based on the mean areole size; allowing st() to try larger
                                           #  balls will not change the modal ball size, but merely slow
                                           #  down the script.

cat('Calculating sizing transform for mask #:')
for(i in 1:length(proc)){
 cat(paste('\n', proc[i], '\n'))
 if(i > 1) tab <- read.table('table.csv')
 mask <- readImage(gsub('jpg', 'msk.tiff', proc[i]))
 sz <- st(mask, max_mask = max_balls[i])
 rm(mask)
 gc()

# determine which of the sizing transform kernels is the modal one
 sz_tab <- as.table(table(sz$sizes)[-1])
 sz_mod_kern <- names(sz_tab)[sz_tab == max(sz_tab)]

 tab[i,3] <- sz_mod_kern
 rm(sz_tab)
 rm(sz_mod_kern)
 gc()
 write.table(tab, 'table.csv')
}

# Note that the output table is written to disk each loop; the final version ran without crashing,
#  but this feature is left in place so that the loop can be restarted from the point at which it
#  crashed if it should crash on a different platform or with a larger number of images.

############################################################
# extraction of information from metadata.csv in Green et al. 2014 digital archive
#

md <- readtable('metadata.csv', header = TRUE, sep = '\t')
str(md)
table(md$WOODY)
length(unique(na.omit(md$REV.FAM)))
length(unique(na.omit(md$REV.GEN)))
length(unique(paste(na.omit(md$REV.GEN), na.omit(md$REV.SP))))