# The following script was written by Daniel R. S. Middleton and Chris J. Milne # as supporting information relating to the manuscript published in Environmental Health: # "Assessing urinary flow rate, creatinine, osmolality and other hydration adjustment methods # for urinary biomonitoring using NHANES arsenic, iodine, lead and cadmium data" # by Daniel R. S. Middleton, Michael J. Watts, R. Murray Lark, Chris J. Milne and David A. Polya # It is aimed to be used for the hydration adjustment of spot urinary analyte concentrations by measurements # of urinary concentration, including creatinine, osmolality and urinary flow rate (UFR). # The script was tested using R version 3.2.5. and may not work on earlier versions of the software. # CAVEATS: The data with which this script was originally used for was from the US National Health and Nutrition Examination Survey # (NHANES) (2009-10 and 2011-12). This script was published to enable other researchers to perform similar analyses on NHANES or their own data. # Please ensure that the manuscript has been properly read prior to using this script, for an understanding of the necessary data requirements # (e.g. required input variables and exclusion criteria),limiations of the methodology,statistical considerations and the relevant theoretical background. # A basic understanding of how to use R is required to run the script. Please be sure to take time when running the script and ensure that all of # the appropriate variable fields have been updated. # We ask that you kindly cite the manuscript if making use of this script in any published work. Furthermore, please be sure to cite R and # any additional packages used. # Packages you will need to install: # caret # psych # CONTENTS # PART 1 contains the code used to derive Araki's b values using the methodology descirbed in the manuscript. # PART 2 contains the code used to perform urinary hydration adjustments. # PART 3 contains the code used to comparatively assess the different adjustment methods. ######################################################### PART 1 ############################################################### # Load required packages: library(caret) # For partitioning of data into training:testing sets # Import data using the code below or your preferred import method: # Note: setting your working directory will also determine where the file exports generated later # in the script will go. setwd("mydrive/myfolder/mysubfolder") # Set working directory to the folder containing the data fname <- "mydata.csv" # Assign a name to the relevant csv file #fname<-file.choose() # can also be used to manually choose a file full_data <- read.csv(file=fname, as.is=T) # Assign csv to a dataframe # Partition data into training and testing sets: # Random selection of training samples based on percent partition 'p' and seed '(x) set.seed(1) # Setting a seed allows the partition to be replicated when re-running the script inTrain <- createDataPartition(y = full_data$myvariable,p = .80,list = FALSE) # 80 % has been selected for training here. Any varibale present for all observations can be selected. training <- full_data[ inTrain,] # training data testing <- full_data[-inTrain,] # testing data # At this point, the distribution of some relevant variables should be examined to ensure that the two sets are comparable # For example: side-by-side histograms of age distribution with equal breaks par(mfrow=c(1,2)) hist(training$myvariable,breaks=20) hist(testing$myvariable,breaks=20) # Derive optimum Araki's b values for an assessment criterion for a range of elements # UF Adjustment function - see Equation 5 in manuscript # var1=Unadjusted analyte concentrations # var2=Urinary flow rates in mL/min # var3=Araki's b value # var4=Adjusted analyte concentrations UFR.fun<-function(var1,var2,var3){ var4<-var1*var2^var3 return(var4) # Returns desired variable } # Prepare csv for output of results header <- "Analyte,b,Criterion_Corr,lowerCI,upperCI" write.table(file="bOutputs.csv",header,sep=",",row.names=FALSE,col.names=FALSE,quote=FALSE) attach(training) # Attach training dataset # Perform nested loop to generate multiple Criterion correlatons for UF adjustments using a range of Araki's b values to derive optimum b values. # Criterion A (correaltion between analyte and UFR) is used as an example here erange<-c("myanalyte1","myanalyte2","myanalyte3","myanalyte4") # Define range of analytes/chemical elements for(j in 1:length(erange)){ e<-erange[j] # Set range of Araki's b values brange<-seq(0.01,1.5,by=0.01) # from b=0.01-b=1.5 by increments of 0.01 for(i in 1:length(brange)) { b<-brange[i] # Calculate UeUFR (UFR-adjusted analytes) using the previously defined function UeUFR<-UFR.fun(training[,erange[j]],myUFR,b) # Calculate Criterion A Pearson correlations and upper/lower 95 % CIs result<-cor.test( x=log(training$myUFR), y=log(UeUFR), method = "pearson") result.UFR <- result$estimate loUFRconf<-result$conf.int[1] upUFRconf<-result$conf.int[2] print(paste("x = UF; b =", b)) print(result.UFR) #Output results to csv file write.table(file="bOutputs.csv", paste(e,b,result.UFR, loUFRconf,upUFRconf,sep=","), row.names=FALSE,col.names=FALSE,quote=FALSE,append=TRUE) # End of i loop } # End of j loop } detach(training) # Detach training dataset # The resultant csv file can then be analysed to extract optimum b values including the generation of the types of plots presented in # Figure 2 in the manuscript. # If using the correlation between adjusted analyte concentration and UFR as the assessment criterion, the correlation closest # to absolute zero corresponds to the optimum value of b. If using the correlation between adjusted analyte concentration and, for example, blood analyte concentration, # the strongest correlation (closest to 1) corresponds to the optimum value of b. ######################################################### PART 2 ############################################################### # The optimum Araki's b values derived in PART 1 can be implimented in the UFR-adjustment # of testing dataset, or other biomonitoring dataset, analyte concentrations and compared with the other adjustment methods presented below: # Creatinine-adjusted (µg/g creatinine) testing$myanalyteCRE<-testing$myanalyte/testing$mycreatinine # Where -'myanalyte' is in µg/L and # -'mycreatinine' is in g/L # ER (ng/hr) testing$myanalyteER<-(testing$myanalyte*testing$myVolume_mL)/testing$myt_hours # Where - 'myanalyte' is in µg/L # - 'myVolume_mL' is the urine sample total volume in mL # - 'myt_hours' is the time since the previous void in hours # ERBW (ng/kg-hr) testing$myanalyteERBW<-(testing$myanalyte*testing$myVolume_mL)/(testing$myt_hours*testing$myBW) # As above and where - 'myBW' is in kg # Osmolality-adjusted (µg/L) # 1. Derive osmolality reference value - in this case the median osmolality of the training dataset # (values vary. When comparing with other studies, be sure to check the which value was chosen e.g. mean, median or arbitrary) OSref<-median(training$myosmolality) # 2. Perform adjustment testing$myanalyteOS<-testing$myanalyte*(OSref/testing$myosmolality) # Where -'myanalyte' is in µg/L and # -'myosmolality' is in mOsm/kg # UFR-adjusted (µg/L, UFR 1 mL/min) testing$myanalyteUFR<-testing$myanalyte*testing$myUFR^b # Where -'myanalyte' is in µg/L and # -'myUFR' is in mL/min # -'b' is the analyte-specific optimum Araki's b value derived in PART 1 ######################################################### PART 3 ############################################################### # Calculate Pearson correlation coefficient for selected performance criteria e.g. adjusted analyte concentration versus UFR # Here, creatinine adjustment is compared to osmolality adjustment as an example # Correlation 1 cor.test(log(testing$myanalyteCRE),log(testing$myUFR),method = "pearson") # Correlation 2 cor.test(log(testing$myanalyteOS),log(testing$myUFR),method = "pearson") # To test the significance of the difference between these two adjustment methods, Williams' test can be employed. # Correlation 3: calculate the Pearson correlation between both of the adjusted concentrations. # This correlation requires specification in the Williams' test as metrics are not independent. cor.test(log(testing$myanalyteOS),log(testing$myanalyteCRE),method = "pearson") library(psych) # Required package for Williams' test r.test(n=mysamplesize,r12=Correlation1,r13=Correlation2,r23=Correlation3,twotailed=T) # For help with using any of the functions used in the script simply type: ? followed by the name of the function # E.g. ?r.test