/*================================================================================
        Created:  30 NOV 2022
       Modified:  30 NOV 2022
       Modified:  08 DEC 2022
       Modified:  14 DEC 2022
       Modified:  16 DEC 2022
       Modified:  17 MAR 2023 
       Modified:  23 MAR 2023 
  Last Modified:  27 MAR 2023 
 
  Prepared by Reid D. Landes
              Department of Biostatistics
              Univ of Arkansas for Medical Sciences
              Little Rock, Arkansas, USA
              rdlandes@uams.edu 
  ================================================================================*/
* A global variable. This is the location from where data files will be pulled
  into the program, and to where data files created in the program will be stored;
%let LOCATION = ????;

options ls=100 ps=55 nodate nonumber formdlim=" " nonotes;
/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                                 POWER ANALYSIS
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/

/*================================================================================
  010 - Generating fake data. The data have the expected values under the alter-
        native hypotheses (H1). The dataset is H1_DATA.  
  ================================================================================*/

data H1_DATA; 
do HIIT_N = 38;
 do ID = 1 to 46;
  HIIT = 1 - (ID > HIIT_N);
  do TIME = 0 to 1;
     Y = 11.5 - 2.3*HIIT*TIME;
     output;
  end;
 end;
end;
run;
title "Power Analysis: Assumed sample sizes at each time point";
proc freq data=H1_DATA;
 tables HIIT*TIME/ norow nocol nopercent;
 run;

/*================================================================================
  020 - Statistical analysis of H1_DATA data.
  ================================================================================*/

title 'Power Analysis: Implementing the Statistical Analysis Plan';
title2 'Expected results under the Alternative Hypothesis';
proc mixed data=H1_DATA noprofile;
 by HIIT_N;
 class HIIT ID TIME; 
 model Y = HIIT|TIME;
 repeated / subject = ID type=ar(1);
 parms (29.16) (0.60) / noprofile noiter;
 estimate 'Week4 - Baseline | HIIT' TIME 1 -1 HIIT*TIME 0 0 1 -1 / cl;
 estimate 'Week4 - Baseline | Ctrl' TIME 1 -1 HIIT*TIME 1 -1 0 0 / cl;
 estimate 'HIIT*TIME interaction' HIIT*TIME -1 1 1 -1 / cl;
 lsmeans HIIT*TIME;
 ods listing select covparms tests3 estimates lsmeans;
 ods output estimates=EST0;
 run;

/*================================================================================
  030 - Power analysis of H1_DATA data.
        Note: This chunk uses the EST0 dataset created in Chunk 020.
  ================================================================================*/
title 'Power Analysis: Results';
data POWER;
 retain LABEL ESTIMATE POWER ALPHA;
 set EST0;
 AR1 = 0.60;
 ALPHA = .05; 
 POWER =  1 - probt(  tinv( (1-ALPHA/2), DF) , DF, abs(tVALUE)); 
 run;
proc print noobs data=POWER; 
 by AR1;
 var LABEL ESTIMATE  ALPHA POWER;
 run;
proc datasets; 
 delete EST0;
 run;quit;

/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                             RANDOM ALLOCATION PLAN
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/

/* 
  We produce a randomization plan for males and for females. 
  Each plan will allow for 36 potential participants.

  STEP 1. Randomly choose one of 
     the following blocks of 6 and/or 12. 
     PLAN 1 :  6, 6, 6, 6, 6, 6
     PLAN 2 :  12, 6, 6, 6, 6
     PLAN 3 :  6, 12, 6, 6, 6
     PLAN 4 :  6, 6, 12, 6, 6
     PLAN 5 :  6, 6, 6, 12, 6
     PLAN 6 :  6, 6, 6, 6, 12
     PLAN 7 :  12, 12, 6, 6
     PLAN 8 :  12, 6, 12, 6
     PLAN 9 :  12, 6, 6, 12
     PLAN 10:  6, 12, 6, 12
     PLAN 11:  6, 6, 12, 12
     PLAN 12:  12, 12, 12
   ##### Uses SEED1 #####

   STEP 2. Assign ID values 101 - 136 for females and 
                            201 - 236 for males.           
   STEP 3. Within each block, generate M random uniform variates, 
           say U, where M is the size of the block. 
   ##### Uses SEED2 for females #####
   ##### Uses SEED3 for males #####

   STEP 4. Sort each block by U.
   STEP 5. If the block size is 6, 
             then the smallest U value is assigned "CTRL";
             otherwise "HIIT"
           If the block size is 12, 
             then the smallest two U values are assigned "CTRL"; 
             otherwise "HIIT".
   STEP 6. We accomplish STEPS 2 and 3 in one data step. 
           We produce 10000 such plans so that we can ensure that 
           any particular participant ID is chosen to be in the HIIT 
           approximately 83.3% of the time. We check whether there 
           is any evidence that the distribution of IDs is not uniformly
           distributed to HIIT. We do this with a chi-square test. 
   STEP 7. We randomly choose a SET to use for the final plan. 
   ##### Uses SEED4 #####

*/

/*================================================================================
  100 - Seeds for random value generation
  ================================================================================*/
%let SEED1 = 352016 ; /*Random uniform variate U(0, 1000000) from https://urldefense.com/v3/__http://random.org__;!!NHLzug!NRYBS9AMJmZD9MTrulmfc-eqwHBA5vSNYsHx3ii4FeJXAaG9QdNW6YdSfyW7mi0Iqf2yMcEGorc0HXjm$ .*/    
%let SEED2 = 921385 ; /*Random uniform variate U(0, 1000000) from https://urldefense.com/v3/__http://random.org__;!!NHLzug!NRYBS9AMJmZD9MTrulmfc-eqwHBA5vSNYsHx3ii4FeJXAaG9QdNW6YdSfyW7mi0Iqf2yMcEGorc0HXjm$ .*/
%let SEED3 = 148376 ; /*Random uniform variate U(0, 1000000) from https://urldefense.com/v3/__http://random.org__;!!NHLzug!NRYBS9AMJmZD9MTrulmfc-eqwHBA5vSNYsHx3ii4FeJXAaG9QdNW6YdSfyW7mi0Iqf2yMcEGorc0HXjm$ .*/
%let SEED4 = 631019 ; /*Random uniform variate U(0, 1000000) from https://urldefense.com/v3/__http://random.org__;!!NHLzug!NRYBS9AMJmZD9MTrulmfc-eqwHBA5vSNYsHx3ii4FeJXAaG9QdNW6YdSfyW7mi0Iqf2yMcEGorc0HXjm$ .*/

/*================================================================================
  110 - Step 1
  ================================================================================*/
data STEP1;
 do MALE = 0, 1;
  STEP1_U = round( ranuni(&SEED1)*1000000, 1);
  PLAN = mod(STEP1_U, 12)+1;
  output;
  end;
  run;
title 'Random Allocation: The Chosen Block Plan';
proc print noobs data=STEP1;
 run;

/*================================================================================
  120 - STEPS 2 & 3
  ================================================================================*/
data FEMALES0;
MALE = 0;
do REP = 1 to 10000;
do ID = 101 to 136;
 /*  PLAN 7 :  12, 12, 6, 6        */
 if 101 <= ID <= 112 then do; BLOCK = 1; M = 12; end;
 if 113 <= ID <= 124 then do; BLOCK = 2; M = 12; end;
 if 125 <= ID <= 130 then do; BLOCK = 3; M = 6;  end;
 if 131 <= ID <= 136 then do; BLOCK = 4; M = 6;  end;
 STEP3_U = ranuni(&SEED2);
 output;
end;
end;
 run;
data MALES0;
MALE = 1;
do REP = 1 to 10000;
do ID = 201 to 236;
 /* PLAN 2 :  12, 6, 6, 6, 6 */
 if 201 <= ID <= 212 then do; BLOCK = 1; M = 12; end;
 if 213 <= ID <= 218 then do; BLOCK = 2; M = 6;  end;
 if 219 <= ID <= 224 then do; BLOCK = 3; M = 6;  end;
 if 225 <= ID <= 230 then do; BLOCK = 4; M = 6;  end;
 if 231 <= ID <= 236 then do; BLOCK = 5; M = 6;  end;
 STEP3_U = ranuni(&SEED3);
 output;
end;
end;
 run;

/*================================================================================
  130 - STEP 4
  ===============================================================================*/
proc rank data=FEMALES0 out=FEMALES1;
 by REP BLOCK M;
 var STEP3_U;
 ranks STEP3_R;
 run;
proc rank data=MALES0 out=MALES1;
 by REP BLOCK M;
 var STEP3_U;
 ranks STEP3_R;
 run;
/*===============================================================================
  140 - STEP 5
  ===============================================================================*/
data FEMALES2;
 set FEMALES1;
 if M = 12 then HIIT = 1 - (STEP3_R <= 2);
           else HIIT = 1 - (STEP3_R <  2);
run;
data MALES2;
 set MALES1;
 if M = 12 then HIIT = 1 - (STEP3_R <= 2);
           else HIIT = 1 - (STEP3_R <  2);
run;
/*===============================================================================
  150 - STEP 6
  ===============================================================================*/
title1 'Random Allocation: Check of randomization for females';
proc freq data=FEMALES2;
 tables ID*HIIT /  nocol nopercent chisq;
 ods output crosstabfreqs=XTAB0;
 ods listing exclude crosstabfreqs;
 run;
proc sort data=XTAB0; 
 by ROWPERCENT;
 run;
data FEMALE_XTAB1;
 set XTAB0;
 HIIT_PERCENT = ROWPERCENT;
 if HIIT_PERCENT = . then delete;
 if HIIT = 1 then output;
proc print  data=FEMALE_XTAB1;
 var ID HIIT_PERCENT;
 run;

title1 'Random Allocation: Check of randomization for males';
proc freq data=MALES2;
 tables ID*HIIT /  nocol nopercent chisq;
 ods output crosstabfreqs=XTAB0;
 ods listing exclude crosstabfreqs;
 run;
proc sort data=XTAB0; 
 by ROWPERCENT;
 run;
data MALE_XTAB1;
 set XTAB0;
 HIIT_PERCENT = ROWPERCENT;
 if HIIT_PERCENT = . then delete;
 if HIIT = 1 then output;
proc print  data=MALE_XTAB1;
 var ID HIIT_PERCENT;
 run;
 
/*===============================================================================
  160 - STEP 7
  ===============================================================================*/ 
data STEP7;
  CHOSEN_REP = 1 + round(ranuni(&SEED4)*10000, 1);
  MERGER = 999;
  run;
title 'Random Allocation: Chosen REP';
proc print noobs data=STEP7;
 var CHOSEN_REP;
run;

/* Merging the two sexes datasets */
data BOTH_SEXES;
 set MALES2 FEMALES2;
 MERGER = 999;
 run;
/* Merging the CHOSEN_REP into BOTH_SEXES */
data FINAL_PLAN_ORIG;
 merge BOTH_SEXES STEP7;
 by MERGER;
 if REP = CHOSEN_REP then output;
 run;
proc sort data=FINAL_PLAN_ORIG;
 by MALE;
 run;
/* Keeping only those variables relevant to the investigator - Emir Tas */
data ALLOCATION_PLAN;
 retain ENROLLMENT_ORDER sex study_group ;
 set FINAL_PLAN_ORIG;
 by MALE;
 if first.MALE then ENROLLMENT_ORDER = 1;
               else ENROLLMENT_ORDER + 1;
 sex = MALE;
 study_group = HIIT;
 keep ENROLLMENT_ORDER study_group sex;
 run;

/*===============================================================================
  170 - The random allocation plan
        And clearing out datasets no longer needed.
  ===============================================================================*/ 
/* Checking frequency of HIIT treatments in both sexes */
title "Random Allocation: HIIT assignment among 36 potential same-sex participants";
proc freq data=ALLOCATION_PLAN;
 tables sex*study_group/norow nocol nopercent;
 run;
title "Random Allocation Plan"; 
proc print data=ALLOCATION_PLAN noobs;
run;

proc datasets;
 delete BOTH_SEXES FEMALES0 - FEMALES2 FEMALE_XTAB1 FINAL_PLAN_ORIG 
        MALES0 - MALES2 MALE_XTAB1 STEP1 STEP7 XTAB0;
 run;quit;


/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                                STUDY DATA
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/

 /*================================================================================
  200 - Reading in the study data
  ================================================================================*/
proc import datafile = "&LOCATION\HIIT Data (03-23-2023).xlsx" 
  dbms = xlsx replace out = WORK1;
  sheet = HIIT_Data ;
  run; 

/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                        UPDATED POWER ANALYSES
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/
 
/*================================================================================
  310 - Generating fake data so that the observed sample sizes at both time points
        match what was obtained. The data have the expected values under the alter-
        native hypotheses (H1). The dataset is H1_DATA_updated.  
  ================================================================================*/
data H1_DATA_updated;
 retain OUTCOME ID HIIT TIME EY Y;
 set WORK1;
 ID = STUDY_ID;
 HIIT = (ARM = "HIIT");
 TIME = (WEEK = 4); 
 EY = 11.5 - 2.3*HIIT*TIME;
 if Y = . then EY = .;
 if OUTCOME="ihtg_25" then output;
 keep OUTCOME ID HIIT TIME WEEK EY Y;
run;
title 'Updated Power Analysis: Obtained sample sizes';
proc freq data = H1_DATA_updated;
 where EY ne .;
 tables HIIT*TIME / norow nocol nopercent;
 run;

/*================================================================================
  320 - Statistical analysis of H1_DATA_updated data.
  ================================================================================*/
title 'Updated Power Analysis: Implementing the Statistical Analysis Plan';
title2 'Expected results under the Alternative Hypothesis';
proc mixed data=H1_DATA_updated noprofile;
 class HIIT ID TIME; 
 model EY = HIIT|TIME;
 repeated / subject = ID type=ar(1);
 parms (29.16) (0.60) / noprofile noiter;
 estimate 'Week4 - Baseline | HIIT' TIME 1 -1 HIIT*TIME 0 0 1 -1 / cl;
 estimate 'Week4 - Baseline | Ctrl' TIME 1 -1 HIIT*TIME 1 -1 0 0 / cl;
 estimate 'HIIT*TIME interaction' HIIT*TIME -1 1 1 -1 / cl;
 lsmeans HIIT*TIME;
 ods listing select covparms tests3 estimates lsmeans;
 ods output estimates=EST10;
 run;
/*================================================================================
  330 - Power analysis of H1_DATA_updated data.
        Note: This chunk uses the EST10 dataset created in Chunk 320.
  ================================================================================*/
title 'Updated Power Analysis: Results';
data UPDATED_POWER;
 retain LABEL ESTIMATE POWER ALPHA;
 set EST10;
 AR1 = 0.60;
 ALPHA = .05; 
 POWER =  1 - probt(  tinv( (1-ALPHA/2), DF) , DF, abs(tVALUE)); 
 run;
proc print noobs data=UPDATED_POWER; 
 by AR1;
 var LABEL ESTIMATE  ALPHA POWER;
 run;
proc datasets; 
 delete EST10;
 run;quit;


/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                        PLANNED STATISTICAL ANALYSES
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/

options notes;
/*================================================================================
  400 - Analyses of IHTG_25 and CAP
        Checks of normal assumptions follow the repeated measures ANOVA results.
  ================================================================================*/
proc sort data=WORK1;
 by OUTCOME ARM STUDY_ID WEEK;
 run;
title "Planned Analyses of IHTG_25 and CAP";
proc mixed data = WORK1;
 by OUTCOME;
 where OUTCOME in ('ihtg_25','CAP');
 class ARM STUDY_ID WEEK;
 model Y = ARM|WEEK / ddfm=kr2  outp=OUT1;
 repeated / subject = STUDY_ID type=ar(1);
 lsmeans ARM*WEEK / cl e;
 estimate '[1]  4w - 0w | HIIT' 
           WEEK -1 1 
		   ARM*WEEK 0 0    -1 1  / cl;
 estimate '[2]  4w - 0w | Ctrl' 
           WEEK -1 1 
		   ARM*WEEK -1 1   0 0  / cl;
 estimate '[3] HIIT*WEEK | Wk0 & 4' 
		   ARM*WEEK 1 -1   -1 1  / cl;
ods listing select covparms tests3 lsmeans estimates;
ods output lsmeans=LSM1 estimates=EST1 covparms=COVPARM1;
run;
title2 "Check of normal assumptions";
proc univariate data=OUT1 normal plots;
by OUTCOME;
var RESID;
ods listing select plots testsfornormality;
run;

/*================================================================================
  410 - Analyses of all other outcomes
        Checks of normal assumptions follow the repeated measures ANOVA results.
  ================================================================================*/
title "Planned Analyses (with covariate modification) of all other outcomes";
proc mixed data = WORK1;
 by OUTCOME;
 where OUTCOME not in ('ihtg_25','CAP');
 class ARM STUDY_ID WEEK;
 model Y = ARM|WEEK initial_IHTG / ddfm=kr2 s cl outp=OUT0;
 repeated / subject = STUDY_ID type=ar(1);
 lsmeans ARM*WEEK / cl e;
 estimate '[1]  4w - 0w | HIIT' 
           WEEK -1 1 
		   ARM*WEEK 0 0    -1 1  / cl;
 estimate '[2]  4w - 0w | Ctrl' 
           WEEK -1 1 
		   ARM*WEEK -1 1   0 0  / cl;
 estimate '[3] HIIT*WEEK | Wk0 & 4' 
		   ARM*WEEK 1 -1   -1 1  / cl;
ods listing select /*covparms tests3 lsmeans*/ estimates;
ods output lsmeans=LSM0 estimates=EST0 solutionf=SOLN0 covparms=COVPARM0;
run;
proc univariate data=OUT0 normal plots;
by OUTCOME;
var RESID;
ods listing select plots testsfornormality;
run;

/*
Outcomes with evidence that distributions were not normal:
CAP		    IHTG_25		ALT_0hr		AST_0hr			Adipo_0hr
FGF21_0hr	Gluc2hr		HOMA_IR		Insulin_0hr
Insulin_2hr	Lept_0hr	TE		    
Trig_0hr	VO2			Weight
*/

/*================================================================================
  420 - Assembling results into a single datafile.
        Using datasets created in Chunks 400 and 410.
  ================================================================================*/

/* Assembling the initial IHTG slope estimates */
data SOLN1;
 retain OUTCOME BETA BETA_LO BETA_UP BETA_PVAL EFFECT;
 set SOLN0;
 BETA = ESTIMATE;
 BETA_LO = LOWER;
 BETA_UP = UPPER;
 BETA_PVAL = PROBT;
 if EFFECT = 'INITIAL_IHTG' then output;
 keep OUTCOME BETA BETA_LO BETA_UP BETA_PVAL ;
 run;
/* Assembling the Covariance Parameters */
data COVPARM2 COVPARM3; 
 set COVPARM0 COVPARM1;
 if COVPARM = "Residual" then output COVPARM2; 
 if COVPARM = "AR(1)" then output COVPARM3;
 run;
data COVPARM4; 
 set COVPARM2;
 rMSE = sqrt(ESTIMATE);
 drop COVPARM SUBJECT ESTIMATE;
proc sort; 
 by OUTCOME;
 run;
data COVPARM5; 
 set COVPARM3;
 AR1_corr = ESTIMATE;
 drop COVPARM SUBJECT ESTIMATE;
proc sort; 
 by OUTCOME;
 run;
data COVPARM6;
 merge COVPARM4 COVPARM5 SOLN1;
 by OUTCOME;
 run;

/* Assembling the ARM*WEEK means */
data LSM2;
 set LSM0 LSM1;
 keep OUTCOME ARM WEEK ESTIMATE;
proc sort; 
 by OUTCOME ARM WEEK;
 run;
proc transpose data=LSM2 out=LSM3;
 by OUTCOME ARM;
 var ESTIMATE;
 run;
data LSM4;
 set LSM3;
 MEAN0wk = COL1;
 MEAN4wk = COL2;
 if ARM = 'CTRL' then LABEL = "[2]  4w - 0w | Ctrl";
 if ARM = 'HIIT' then LABEL = "[1]  4w - 0w | HIIT";
 keep OUTCOME MEAN0wk MEAN4wk LABEL;
proc sort; 
 by OUTCOME LABEL;
 run;

data EST2;
 set EST0 EST1;
 LOWER95 = LOWER;
 UPPER95 = UPPER;
 drop LOWER UPPER ALPHA;
proc sort; 
 by OUTCOME LABEL;
 run;
data EST3;
 merge EST2 COVPARM6;
 by OUTCOME;
 run;
data EST4;
 retain OUTCOME LABEL MEAN0wk MEAN4wk DIFF LOWER95 UPPER95 
        PROBT rMSE AR1_corr BETA BETA_LO BETA_UP BETA_PVAL;
 merge EST3 LSM4;
 by OUTCOME LABEL;
 DIFF = ESTIMATE; 
 if LABEL ne "[1]  4w - 0w | HIIT" then do; 
             rMSE = . ;
             AR1_corr = . ;
             BETA = . ;
             BETA_LO = . ;
             BETA_UP = . ;
             BETA_PVAL = . ;
             end;
 keep OUTCOME LABEL MEAN0wk MEAN4wk DIFF LOWER95 UPPER95 
        PROBT rMSE AR1_corr BETA BETA_LO BETA_UP BETA_PVAL;
 run;
/* Rounding EST4 so that it's not so hard to look at. */
data HIIT_TABLE2; 
 set EST4;
 MEAN0wk = round(MEAN0wk, .01);
 MEAN4wk = round(MEAN4wk, .01);
 DIFF = round(DIFF, .01);
 LOWER95 = round(LOWER95, .01);
 UPPER95 = round(UPPER95, .01);
 if PROBT < .0001 then PROBT = .0001; 
 rMSE = round(rMSE, .01);
 AR1_corr = round(AR1_corr, .01);
 BETA = round(BETA, .01);
 BETA_LO = round(BETA_LO, .01);
 BETA_UP = round(BETA_UP, .01);
 BETA_PVAL = round(BETA_PVAL, .001);
 if OUTCOME in 
("Weight", "BMI", "SUBTOT_PFAT", "SUBTOT_LEAN", "VFAT_AREA", "ihtg_25", 
"CAP", "TE", "VO2_lean", "GlucFasting", "Gluc2hr", "Insulin_0hr", 
"Insulin_2hr", "HOMA_IR", "Total_Chol_0hr", "HDL_0hr", "LDL_0hr", 
"Trig_0hr", "ALT_0hr", "AST_0hr", "Adipo_0hr", "Lept_0hr", "FGF21_0hr") then output;
 run;
proc export data=HIIT_TABLE2 
 outfile="HIIT Table 2 (from SAS).csv"; 
run; quit;
/* Clearing out datasets no longer needed. */
proc datasets;
 delete COVPARM1 - COVPARM6 EST0 - EST4 LSM2 - LSM4 SOLN1;
 run; quit;

/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                      SENSITIVITY ANALYSES VIA BOOTSTRAP
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/

/*================================================================================
  501 - Getting STUDY_ID <--> SUBJ key
        Uses BASELINE0 created in Chunk 210
  ================================================================================*/
proc sort data=WORK1;
 by STUDY_ID;
 run;
data TMP1;
 set WORK1;
 by STUDY_ID;
 if first.STUDY_ID then output;
 run;
data ID0;
 set TMP1;
 U0 = ranuni(688080);
 if STUDY_ID = " " then delete;
 keep STUDY_ID initial_IHTG U0;
 run;
proc sort data=ID0;
 by U0;
 run;
data ID_KEY;
 set ID0;
 by U0;
 if first.U0 then SUBJ+1;
 drop U0;
 run;
proc sort data=ID_KEY;
 by STUDY_ID;
 run;

proc datasets;
 delete TMP1 ID0;
 run;quit;

/*================================================================================
  502 - Getting TREATMENT*WEEK mean vectors
        Uses LSM1, LSM0, and SOLN0 created in Chunks 400 and 410.
  ================================================================================*/

data MEANS0;
 set LSM0 LSM1;
 MEAN = ESTIMATE;
 keep OUTCOME ARM WEEK MEAN;
 run;
proc sort data=MEANS0;
 by OUTCOME ARM WEEK;
 run;
proc transpose data=MEANS0 out=MEANS1;
 by OUTCOME ARM;
 var MEAN;
 run; 
data MEANS2;
 set MEANS1;
 MEAN0 = COL1;
 MEAN1 = COL2;
 keep OUTCOME ARM MEAN0 MEAN1;
 run;

data SOLN10;
 set SOLN0;
 BETA = ESTIMATE;
 if EFFECT = "INITIAL_IHTG" then output;
 keep OUTCOME BETA;
 run;
proc sort data=SOLN10;
 by OUTCOME;
 run;
data MODEL_PARMS;
 merge MEANS2 SOLN10;
 by OUTCOME;
 if first.OUTCOME then OUTCOME_NO + 1;
 if BETA = . then BETA=0;
 run;

proc datasets;
 delete MEANS0-MEANS2 SOLN10;
 run;quit;

/*================================================================================
  503 - Getting residual vectors
        Uses OUT0 and OUT1 created in Chunks 400 and 410.
  ================================================================================*/

data RESID0;
 set OUT0 OUT1;
 keep STUDY_ID OUTCOME WEEK RESID;
 run;
proc sort data=RESID0;
 by OUTCOME STUDY_ID WEEK;
 run;
proc transpose data=RESID0 out=RESID1;
 by OUTCOME STUDY_ID;
 var RESID;
 run;
data RESID2;
 retain OUTCOME OUTCOME_NO ;
 set RESID1;
 by OUTCOME;
 if first.OUTCOME then OUTCOME_NO + 1;
 E0 = COL1;
 E1 = COL2;
 keep OUTCOME OUTCOME_NO STUDY_ID E0 E1;
 run;
proc sort data=RESID2;
 by STUDY_ID;
 run;
data RESID_VECTORS;
 merge RESID2 ID_KEY;
 by STUDY_ID;
 keep OUTCOME OUTCOME_NO SUBJ E0 E1;
 run;
proc sort data=RESID_VECTORS;
 by OUTCOME SUBJ;
 run;

proc datasets;
 delete RESID0-RESID2;
 run;quit;
 
/*================================================================================
  504 - Drawing the bootstap samples - randomly sampling subjects with replacement 
  ================================================================================*/

/* A randomly generated seed value. */
%let SEED1 = 491578; 
/* This datastep obtains the subjects in each bootstrapped sample. 
   Producing one bootstrap sample: SUBJ number corresponds to a particular STUDY_ID 
   in the ID_KEY dataset. Here, BOOT_SUBJ identifies 40 potential participants for 
   which we need a randomly selected SUBJ number. This random selection of SUBJ 
   numbers is done with replacement. Since each SUBJ provides an observation on 
   several outcomes, the same SUBJ number will associated with all OUTCOME_NO 
   for a given BOOT_SUBJ number.  This maintains the related nature of results from 
   several outcomes. */ 
data BOOT1;
retain BOOT OUTCOME_NO BOOT_SUBJ ARM SUBJ;
do BOOT = 1 to 10000;
  do BOOT_SUBJ = 1 to 40;
     U = 0.5 + ranuni(&SEED1)*40;
     SUBJ = round( U , 1);
	 if BOOT_SUBJ > 6 then ARM = "HIIT";
                      else ARM = "CTRL"; 
	 do OUTCOME_NO = 1 to 26; *** This value depends on how many outcomes there are
	                              in the WORK1 dataset;
     output;
	 end;
  end;
 end;
drop U;
 run;
proc sort data=BOOT1;
 by OUTCOME_NO SUBJ;
 run;
proc sort data=RESID_VECTORS;
 by OUTCOME_NO SUBJ;
 run;
 /* The errors (estimated with residuals from the models) are 
    assumed to be independent among the individuals, and assumed 
    that the distribution of the errors do not depend on HIIT treatment. 
    Here, we merge the SUBJ-specific residual vectors into BOOT1. */
data BOOT2;
 merge BOOT1 RESID_VECTORS;
 by OUTCOME_NO SUBJ;
 if BOOT = . then delete;
 run;
/*================================================================================
  505 - Creating the bootstrapped datasets
  ================================================================================*/
 /* Now we need to associate the estimated treatment*time means
    (from the original analyses) to the treatment underwhich SUBJ 
    is randomly assigned. */
proc sort data=BOOT2;
 by OUTCOME_NO ARM;
 run;
proc sort data=MODEL_PARMS;
 by OUTCOME_NO ARM;
 run;  
data BOOT3; 
 merge BOOT2 MODEL_PARMS;
 by OUTCOME_NO ARM;
 run;
proc sort data=BOOT3;
 by SUBJ;
 run;
proc sort data=ID_KEY;
 by SUBJ;
 run;
data BOOT4;
 retain BOOT OUTCOME ARM SUBJ Y0 Y1;
 merge BOOT3 ID_KEY;
 by SUBJ;
 Y0 = MEAN0 + BETA*initial_IHTG + E0;
 Y1 = MEAN1 + BETA*initial_IHTG + E1;
 run;
proc sort data=BOOT4;
 by BOOT OUTCOME BOOT_SUBJ ;
 run;
 /* Checking whether each SUBJ was randomly select about 1/40th 
    of the time. */
title "A random-selection check";
proc freq data=BOOT4;
 tables SUBJ;
 run;

data BOOT5;
 retain BOOT OUTCOME ARM BOOT_SUBJ INITIAL_IHTG WEEK Y  ;
 set BOOT4;
 WEEK = 0; Y = Y0; output;
 WEEK = 4; Y = Y1; output;
 keep BOOT OUTCOME ARM BOOT_SUBJ INITIAL_IHTG WEEK Y  ;
run; 

/*================================================================================

  !@#$%& !@#$%& !@#$%&  WARNING! SUBSTANTIAL COMPUTING TIME!  !@#$%& !@#$%& !@#$%&

  550 - Analyses of IHTG_25 and CAP
  ================================================================================

option nonotes;
proc sort data=BOOT5;
 by OUTCOME BOOT ARM BOOT_SUBJ WEEK;
 run;
title "Planned Analyses of IHTG_25 and CAP";
proc mixed data = BOOT5;
 by OUTCOME BOOT ;
 where OUTCOME in ('ihtg_25','CAP');
 class ARM BOOT_SUBJ WEEK;
 model Y = ARM|WEEK / ddfm=kr2  ;
 repeated / subject = BOOT_SUBJ type=ar(1);
 lsmeans ARM*WEEK / cl e;
 estimate '[1]  4w - 0w | HIIT' 
           WEEK -1 1 
		   ARM*WEEK 0 0    -1 1  / cl;
 estimate '[2]  4w - 0w | Ctrl' 
           WEEK -1 1 
		   ARM*WEEK -1 1   0 0  / cl;
 estimate '[3] HIIT*WEEK | Wk0 & 4' 
		   ARM*WEEK 1 -1   -1 1  / cl;
ods listing select none; 
ods output lsmeans=BOOTLSM1 estimates=BOOTEST1 ;
run;

/*================================================================================
 
  !@#$%& !@#$%& !@#$%&  WARNING! SUBSTANTIAL COMPUTING TIME!  !@#$%& !@#$%& !@#$%&

  560 - Analyses of all other outcomes
  ================================================================================

title "Planned Analyses (with covariate modification) of all other outcomes";
proc mixed data = BOOT5;
 by OUTCOME BOOT ;
 where OUTCOME not in ('ihtg_25','CAP');
 class ARM BOOT_SUBJ WEEK;
 model Y = ARM|WEEK initial_IHTG / ddfm=kr2 s cl ;
 repeated / subject = BOOT_SUBJ type=ar(1);
 lsmeans ARM*WEEK / cl e;
 estimate '[1]  4w - 0w | HIIT' 
           WEEK -1 1 
		   ARM*WEEK 0 0    -1 1  / cl;
 estimate '[2]  4w - 0w | Ctrl' 
           WEEK -1 1 
		   ARM*WEEK -1 1   0 0  / cl;
 estimate '[3] HIIT*WEEK | Wk0 & 4' 
		   ARM*WEEK 1 -1   -1 1  / cl;
ods listing select none;
ods output lsmeans=BOOTLSM0 estimates=BOOTEST0 solutionf=BOOTSOLN0;
run;


/*================================================================================
  565 - Outputting the "HIIT Bootstrapped Estimates"
  ================================================================================

data BOOTEST2;
 set BOOTEST0 BOOTEST1;
 keep OUTCOME BOOT LABEL ESTIMATE;
 run;
proc sort data=BOOTEST2;
 by OUTCOME LABEL BOOT;
 run;

proc export data=BOOTEST2 
 outfile="&LOCATION\HIIT Bootstrapped Estimates (12-16-2022).csv";
 run;quit;

/*================================================================================
  570 - Importing the HIIT Bootstrapped Estimates created in Chunks 501 - 565.
         Computing bootstrap CIs using alpha/2 and 1-alpha/2 quantiles. 
  ================================================================================*/
proc import datafile="&LOCATION\HIIT Bootstrapped Estimates (12-16-2022).csv"
 out=BOOTEST2 replace; 
 run;
 proc univariate data=BOOTEST2;
  by OUTCOME LABEL ;
  var ESTIMATE;
  output out=OUTBOOTEST0 pctlpre=P_ pctlpts= 2.5, 97.5; 
  run;
data OUTBOOTEST00;
 retain OUTCOME LABEL P_2_5 P_97_5 ;
 set OUTBOOTEST0;
 P_lt_05 = 1 - (P_2_5 < 0 < P_97_5);
 run;

/*
proc export data=OUTBOOTEST0 
 outfile="&LOCATION\HIIT Bootstrapped quantile 95% CIs (01-08-2023).csv";
 run;quit;
 */
 

/*================================================================================
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
                               SECONDARY ANALYSES
  ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
  ================================================================================*/

/*================================================================================
  610 - Data manipulations
  ================================================================================*/
data TMP10;
 set WORK1;
 if OUTCOME = "ihtg_25" then output; 
 run;
proc sort data=TMP10;
 by OUTCOME ARM STUDY_ID WEEK;
 run;
proc transpose data=TMP10 out=TMP11;
 by OUTCOME ARM STUDY_ID ;
 var Y;
 run; 
data TMP1;
 set TMP11;
 IHTG_0 = COL1; IHTG_1 = COL2; IHTG_diff = COL2 - COL1; IHTG_r = 100*IHTG_diff / COL1;
 drop OUTCOME _LABEL_ _NAME_ COL1 COL2;
 run;
proc sort; 
 by STUDY_ID;
 run;

data TMP20;
 set WORK1;
 if OUTCOME = "VO2_lean" then output;
 run;
proc sort data=TMP20;
 by OUTCOME ARM STUDY_ID WEEK;
 run;
proc transpose data=TMP20 out=TMP21;
 by OUTCOME ARM STUDY_ID ;
 var Y;
 run; 
data TMP2;
 set TMP21;
 VO2L_0 = COL1; VO2L_1 = COL2; VO2L_diff = COL2 - COL1; VO2L_r = 100*VO2L_diff / COL1;
 drop OUTCOME _LABEL_ _NAME_ COL1 COL2;
 run;
proc sort; 
 by STUDY_ID;
 run;

data TMP30;
 set WORK1;
 if OUTCOME = "HOMA_IR" then output;
 run;
proc sort data=TMP30;
 by OUTCOME ARM STUDY_ID WEEK;
 run;
proc transpose data=TMP30 out=TMP31;
 by OUTCOME ARM STUDY_ID ;
 var Y;
 run; 
data TMP3;
 set TMP31;
 HIR_0 = COL1; HIR_1 = COL2; HIR_diff = COL2 - COL1; HIR_r = 100*HIR_diff / COL1;
 drop OUTCOME _LABEL_ _NAME_ COL1 COL2;
 run;
proc sort; 
 by STUDY_ID;
 run;

data SECOND_DATA1;
 merge TMP1 TMP2 TMP3;
 by STUDY_ID;
 run;

 /*
 * Exporting SECOND_DATA1 for plotting in R ;
 proc export data=SECOND_DATA1  outfile = "&LOCATION\HIIT Data for secondary analyses (03-27-2023).csv"; 
 run; 
 */

/*================================================================================
  620 - ANCOVA analyses with fitness as the covariate
         ANCOVA model has an intercept and slope for the HIIT group, and
                          an intercept and slope for the CTRL group; 
                          thus 4 parameters in total.
        We use two equivalent parameterizations of the model.
        We also check the normal assumption on the residuals.
  ================================================================================*/
/* Analysis 2.0: Fitting IHTG_diff = HIIT + HIIT*VO2L_diff */
title 'IHTG change = HIIT + HIIT*VO2 change';
title2 'Parameterization 1';
proc mixed data=SECOND_DATA1;
 class ARM (ref="HIIT");
 model IHTG_diff = ARM ARM*VO2L_diff / noint s outp=OUT0 cl;
 ods listing select covparms solutionf ;
 run;
title2 'Parameterization 2';
proc mixed data=SECOND_DATA1;
 class ARM (ref="HIIT");
 model IHTG_diff = ARM VO2L_diff ARM*VO2L_diff /  s outp=OUT0;
 ods listing select covparms solutionf ;
 run;
title2 'Check of normal assumption';
proc univariate data=OUT0 normal plots;
 var RESID;
 ods listing select plots testsfornormality;
 run;

/*================================================================================
  621 - Updated ANCOVA analyses with fitness as the covariate
        Since there was no evidence that the slopes on fitness differed between
        the two treatment groups (see Parameterization 2), we assume the slope 
        is the same for both groups, and refit the model. 
         ANCOVA model has an intercept for the HIIT group, 
                          an intercept for the CTRL group, and
                          a common slope for both the HIIT and CTRL groups; 
                          thus 3 parameters in total.
        We also check the normal assumption on the residuals.

  ================================================================================*/
/* Analysis 2.1: Fitting IHTG_diff = HIIT + VO2L_diff */
title 'IHTG change = HIIT + VO2 change';
title2 'Parameterization 1';
proc mixed data=SECOND_DATA1;
 class ARM (ref="HIIT");
 model IHTG_diff = ARM VO2L_diff /  s outp=OUT0;
 ods listing select covparms solutionf ;
 run;
title2 'Check of normal assumption';
proc univariate data=OUT0 normal plots;
 var RESID;
 ods listing select plots testsfornormality;
 run;

/*================================================================================
  622 - Updated ANCOVA analyses with fitness as the covariate
        Since there was no evidence that the intercepts on fitness differed between
        the two treatment groups, we assume their are no differences in intercepts 
        or slopes, and refit the model. 
        The model is now a simple linear regression with one intercept and slope.
        We also check the normal assumption on the residuals.

  ================================================================================*/
/* Analysis 2.2: Fitting IHTG_diff = VO2L_diff */
title 'IHTG change = VO2 change';
title2 'Parameterization 1';
proc mixed data=SECOND_DATA1;
 model IHTG_diff = VO2L_diff /  s outp=OUT0;
 ods listing select covparms solutionf ;
 run;
title2 'Check of normal assumption';
proc univariate data=OUT0 normal plots;
 var RESID;
 ods listing select plots testsfornormality;
 run;

 
/*================================================================================
  630 - ANCOVA analyses with insulin sensitivity as the covariate
         ANCOVA model has an intercept and slope for the HIIT group, and
                          an intercept and slope for the CTRL group; 
                          thus 4 parameters in total.
        We use two equivalent parameterizations of the model.
        We also check the normal assumption on the residuals.
  ================================================================================*/
/* Analysis 3.0: Fitting IHTG_diff = HIIT + HIIT*HIR_diff */
title 'IHTG change = HIIT + HIIT*HIR change';
title2 'Parameterization 1';
proc mixed data=SECOND_DATA1;
 class ARM (ref="HIIT");
 model IHTG_diff = ARM ARM*HIR_diff / noint s outp=OUT0 cl;
 ods listing select covparms solutionf ;
 run;
title2 'Parameterization 2';
proc mixed data=SECOND_DATA1;
 class ARM (ref="HIIT");
 model IHTG_diff = ARM HIR_diff ARM*HIR_diff /  s outp=OUT0;
 ods listing select covparms solutionf ;
 run;
title2 'Check of normal assumption';
proc univariate data=OUT0 normal plots;
 var RESID;
 ods listing select plots testsfornormality;
 run;

/*================================================================================
  631 - Updated ANCOVA analyses with insulin sensitivity as the covariate
        Since there was no evidence that the slopes on insulin sensitivity differed 
        between the two treatment groups (see Parameterization 2), we assume the 
        slope is the same for both groups, and refit the model. 
         ANCOVA model has an intercept for the HIIT group, 
                          an intercept for the CTRL group, and
                          a common slope for both the HIIT and CTRL groups; 
                          thus 3 parameters in total.
        We also check the normal assumption on the residuals.
  ================================================================================*/
/* Analysis 3.1: Fitting IHTG_diff = HIIT + HIR_diff */
title 'IHTG change = HIIT + HIR change';
proc mixed data=SECOND_DATA1;
 class ARM (ref="HIIT");
 model IHTG_diff = ARM HIR_diff /  s outp=OUT0;
 ods listing select covparms solutionf ;
 run;
title2 'Check of normal assumption';
proc univariate data=OUT0 normal plots;
 var RESID;
 ods listing select plots testsfornormality;
 run;

/*================================================================================
  632 - Updated ANCOVA analyses with fitness as the covariate
        Since there was no evidence that the intercepts on fitness differed between
        the two treatment groups, we assume their are no differences in intercepts 
        or slopes, and refit the model. 
        The model is now a simple linear regression with one intercept and slope.
        We also check the normal assumption on the residuals.
  ================================================================================*/
/* Analysis 3.2: Fitting IHTG_diff =  HIR_diff */
title 'IHTG change = HIR change';
proc mixed data=SECOND_DATA1;
 model IHTG_diff = HIR_diff /  s outp=OUT0;
 ods listing select covparms solutionf ;
 run;
title2 'Check of normal assumption';
proc univariate data=OUT0 normal plots;
 var RESID;
 ods listing select plots testsfornormality;
 run;
 

/*================================================================================
  640 - Same as Chunk #400, except we exclude those with IHTG < 5. 
      - Analyses of IHTG_25 and CAP
        Checks of normal assumptions follow the repeated measures ANOVA results.
  ================================================================================*/
proc sort data=WORK1;
 by OUTCOME ARM STUDY_ID WEEK;
 run;
title "Planned Analyses of IHTG_25 and CAP";
title2 "Excluding those with initial NAFLD";
proc mixed data = WORK1;
 by OUTCOME;
 where OUTCOME in ('ihtg_25','CAP') and INITIAL_IHTG >= 5;
 class ARM STUDY_ID WEEK;
 model Y = ARM|WEEK / ddfm=kr2  outp=OUT1_cull;
 repeated / subject = STUDY_ID type=ar(1);
 lsmeans ARM*WEEK / cl ;
 estimate '[1]  4w - 0w | HIIT' 
           WEEK -1 1 
		   ARM*WEEK 0 0    -1 1  / cl;
 estimate '[2]  4w - 0w | Ctrl' 
           WEEK -1 1 
		   ARM*WEEK -1 1   0 0  / cl;
 estimate '[3] HIIT*WEEK | Wk0 & 4' 
		   ARM*WEEK 1 -1   -1 1  / cl;
ods output lsmeans=LSM1_cull estimates=EST1_cull covparms=COVPARM1_cull;
run;
title2 "Check of normal assumptions";
proc univariate data=OUT1 normal plots;
by OUTCOME;
var RESID;
ods listing select plots testsfornormality;
run;