# The following script was written by Daniel R. S. Middleton and Chris J. Milne
# as supporting information relating to the manuscript published in Environmental Health:

# "Assessing urinary flow rate, creatinine, osmolality and other hydration adjustment methods 
# for urinary biomonitoring using NHANES arsenic, iodine, lead and cadmium data"

# by Daniel R. S. Middleton, Michael J. Watts, R. Murray Lark, Chris J. Milne and David A. Polya

# It is aimed to be used for the hydration adjustment of spot urinary analyte concentrations by measurements
# of urinary concentration, including creatinine, osmolality and urinary flow rate (UFR). 

# The script was tested using R version 3.2.5. and may not work on earlier versions of the software.

# CAVEATS: The data with which this script was originally used for was from the US National Health and Nutrition Examination Survey 
# (NHANES) (2009-10 and 2011-12). This script was published to enable other researchers to perform similar analyses on NHANES or their own data.
# Please ensure that the manuscript has been properly read prior to using this script, for an understanding of the necessary data requirements 
# (e.g. required input variables and exclusion criteria),limiations of the methodology,statistical considerations and the relevant theoretical background.
# A basic understanding of how to use R is required to run the script. Please be sure to take time when running the script and ensure that all of 
# the appropriate variable fields have been updated.
# We ask that you kindly cite the manuscript if making use of this script in any published work. Furthermore, please be sure to cite R and
# any additional packages used. 

# Packages you will need to install: 
# caret
# psych

# CONTENTS 
# PART 1 contains the code used to derive Araki's b values using the methodology descirbed in the manuscript.
# PART 2 contains the code used to perform urinary hydration adjustments.
# PART 3 contains the code used to comparatively assess the different adjustment methods.

######################################################### PART 1 ###############################################################

# Load required packages:
library(caret) # For partitioning of data into training:testing sets

# Import data using the code below or  your preferred import method:
# Note: setting your working directory will also determine where the file exports generated later
# in the script will go.
setwd("mydrive/myfolder/mysubfolder") # Set working directory to the folder containing the data

fname <- "mydata.csv" # Assign a name to the relevant csv file

#fname<-file.choose() # can also be used to manually choose a file

full_data <- read.csv(file=fname, as.is=T) # Assign csv to a dataframe

# Partition data into training and testing sets:
# Random selection of training samples based on percent partition 'p' and seed '(x)
set.seed(1) # Setting a seed allows the partition to be replicated when re-running the script

inTrain <- createDataPartition(y = full_data$myvariable,p = .80,list = FALSE) # 80 % has been selected for training here. Any varibale present for all observations can be selected.
training <- full_data[ inTrain,] # training data
testing  <- full_data[-inTrain,] # testing data

# At this point, the distribution of some relevant variables should be examined to ensure that the two sets are comparable
# For example: side-by-side histograms of age distribution with equal breaks
par(mfrow=c(1,2))
hist(training$myvariable,breaks=20)
hist(testing$myvariable,breaks=20)

# Derive optimum Araki's b values for an assessment criterion for a range of elements

# UF Adjustment function - see Equation 5 in manuscript 
# var1=Unadjusted analyte concentrations 
# var2=Urinary flow rates in mL/min
# var3=Araki's b value
# var4=Adjusted analyte concentrations

UFR.fun<-function(var1,var2,var3){ 
  var4<-var1*var2^var3
  return(var4) # Returns desired variable 
}


# Prepare csv for output of results
header <- "Analyte,b,Criterion_Corr,lowerCI,upperCI"
write.table(file="bOutputs.csv",header,sep=",",row.names=FALSE,col.names=FALSE,quote=FALSE)

attach(training) # Attach training dataset

# Perform nested loop to generate multiple Criterion correlatons for UF adjustments using a range of Araki's b values to derive optimum b values.
# Criterion A (correaltion between analyte and UFR) is used as an example here

erange<-c("myanalyte1","myanalyte2","myanalyte3","myanalyte4") # Define range of analytes/chemical elements
for(j in 1:length(erange)){
  e<-erange[j]
  
  # Set range of Araki's b values 
  brange<-seq(0.01,1.5,by=0.01) # from b=0.01-b=1.5 by increments of 0.01
  for(i in 1:length(brange)) {
    b<-brange[i]
    # Calculate UeUFR (UFR-adjusted analytes) using the previously defined function
    UeUFR<-UFR.fun(training[,erange[j]],myUFR,b)
   
    # Calculate Criterion A Pearson correlations and upper/lower 95 % CIs 
    result<-cor.test(
      x=log(training$myUFR),
      y=log(UeUFR),
      method = "pearson")
    result.UFR <- result$estimate
    loUFRconf<-result$conf.int[1]
    upUFRconf<-result$conf.int[2]
    print(paste("x = UF; b =", b))
    print(result.UFR)
    
    #Output results to csv file
    write.table(file="bOutputs.csv",
                paste(e,b,result.UFR,
                      loUFRconf,upUFRconf,sep=","),
                row.names=FALSE,col.names=FALSE,quote=FALSE,append=TRUE)
    
    # End of i loop
  }
  # End of j loop
}
detach(training) # Detach training dataset

# The resultant csv file can then be analysed to extract optimum b values including the generation of the types of plots presented in 
# Figure 2 in the manuscript.

# If using the correlation between adjusted analyte concentration and UFR as the assessment criterion, the correlation closest
# to absolute zero corresponds to the optimum value of b. If using the correlation between adjusted analyte concentration and, for example, blood analyte concentration,
# the strongest correlation (closest to 1) corresponds to the optimum value of b. 

######################################################### PART 2 ###############################################################

# The optimum Araki's b values derived in PART 1 can be implimented in the UFR-adjustment
# of testing dataset, or other biomonitoring dataset, analyte concentrations and compared with the other adjustment methods presented below:

# Creatinine-adjusted (痢/g creatinine)
testing$myanalyteCRE<-testing$myanalyte/testing$mycreatinine
# Where -'myanalyte' is in 痢/L and 
#       -'mycreatinine' is in g/L 

# ER (ng/hr)
testing$myanalyteER<-(testing$myanalyte*testing$myVolume_mL)/testing$myt_hours
# Where - 'myanalyte' is in 痢/L 
#       - 'myVolume_mL' is the urine sample total volume in mL
#       - 'myt_hours' is the time since the previous void in hours

# ERBW (ng/kg-hr)
testing$myanalyteERBW<-(testing$myanalyte*testing$myVolume_mL)/(testing$myt_hours*testing$myBW)
# As above and where - 'myBW' is in kg

# Osmolality-adjusted (痢/L)
# 1. Derive osmolality reference value - in this case the median osmolality of the training dataset
# (values vary. When comparing with other studies, be sure to check the which value was chosen e.g. mean, median or arbitrary)
OSref<-median(training$myosmolality)

# 2. Perform adjustment
testing$myanalyteOS<-testing$myanalyte*(OSref/testing$myosmolality)
# Where -'myanalyte' is in 痢/L and 
#       -'myosmolality' is in mOsm/kg 

# UFR-adjusted (痢/L, UFR 1 mL/min)
testing$myanalyteUFR<-testing$myanalyte*testing$myUFR^b
# Where -'myanalyte' is in 痢/L and 
#       -'myUFR' is in mL/min
#       -'b' is the analyte-specific optimum Araki's b value derived in PART 1

######################################################### PART 3 ###############################################################

# Calculate Pearson correlation coefficient for selected performance criteria e.g. adjusted analyte concentration versus UFR
# Here, creatinine adjustment is compared to osmolality adjustment as an example
# Correlation 1
cor.test(log(testing$myanalyteCRE),log(testing$myUFR),method = "pearson") 
# Correlation 2
cor.test(log(testing$myanalyteOS),log(testing$myUFR),method = "pearson") 

# To test the significance of the difference between these two adjustment methods, Williams' test can be employed. 
# Correlation 3: calculate the Pearson correlation between both of the adjusted concentrations.
# This correlation requires specification in the Williams' test as metrics are not independent.
cor.test(log(testing$myanalyteOS),log(testing$myanalyteCRE),method = "pearson") 

library(psych) # Required package for Williams' test

r.test(n=mysamplesize,r12=Correlation1,r13=Correlation2,r23=Correlation3,twotailed=T)

# For help with using any of the functions used in the script simply type: ? followed by the name of the function
# E.g.

?r.test