Machine Learning-based Colorectal Cancer Prediction using Global Dietary Data

Hanif Abdul Rahman

SOCR University of Michigan and Universiti Brunei Darussalam


Mohammad Ashraf Ottom

SOCR University of Michigan and Yarmouk University


Ivo D. Dinov

SOCR University of Michigan

Supplementary 1


a.colrec <- read.transactions("df_trans_text2.csv", sep = ",", skip = 1, rm.duplicates=TRUE)

colrec_rule <- apriori(a.colrec, parameter=list(support=0.01, confidence=0.25, minlen=2))
## Apriori
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.25    0.1    1 none FALSE            TRUE       5    0.01      2
##  maxlen target  ext
##      10  rules TRUE
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## Absolute minimum support count: 629 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[3743 item(s), 62911 transaction(s)] done [0.06s].
## sorting and recoding items ... [15 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [19 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
sortedRule <- sort(colrec_rule)
x1   <- sortedRule@quality$support
y1   <- sortedRule@quality$confidence
z1   <- sortedRule@quality$lift
col1 <- sortedRule@quality$count
ruleNames <- paste0("Rule", c(1:length(sortedRule@quality$support)))

plot_ly(x = ~x1, y = ~y1, z = ~z1, color = ~z1, name=ruleNames) %>% 
  add_markers() %>% 
  layout(title=paste0("Arule Support-Confidence-Lift Plot (for all ", length(sortedRule@quality$support), " rules)"),
         scene = list(xaxis = list(title = 'Support'),
                     yaxis = list(title = 'Confidence'),
                     zaxis = list(title = 'Lift'))) %>% 
fi_rules<-subset(colrec_rule, items %in% "colrec_ca")
##      lhs                                                                                                              rhs                                                             support confidence   coverage     lift count
## [1]  {Salad dressing, oil and vinegar, homemade}                                                                   => {colrec_ca}                                                  0.01071355  1.0000000 0.01071355  1.00000   674
## [2]  {Lettuce, salad with assorted vegetables including tomatoes and/or carrots, no dressing (Lettuce salad, NFS)} => {colrec_ca}                                                  0.01438540  1.0000000 0.01438540  1.00000   905
## [3]  {FAT BLEND FAT 75% FORTIFIED BREGOTT}                                                                         => {colrec_ca}                                                  0.02161784  1.0000000 0.02161784  1.00000  1360
## [4]  {Vegetable oil, olive}                                                                                        => {colrec_ca}                                                  0.01543450  1.0000000 0.01543450  1.00000   971
## [5]  {Egg, whole, fried, with fat (Scrambled egg, no milk added)}                                                  => {colrec_ca}                                                  0.01586368  1.0000000 0.01586368  1.00000   998
## [6]  {Vegetable oil,  canola and soybean}                                                                          => {colrec_ca}                                                  0.01789830  1.0000000 0.01789830  1.00000  1126
## [7]  {Beef, ground, medium, broiled}                                                                               => {colrec_ca}                                                  0.01827979  1.0000000 0.01827979  1.00000  1150
## [8]  {HARD CHEESE FETT 28%}                                                                                        => {colrec_ca}                                                  0.02867543  1.0000000 0.02867543  1.00000  1804
## [9]  {Butter, regular}                                                                                             => {colrec_ca}                                                  0.02824625  1.0000000 0.02824625  1.00000  1777
## [10] {Cheese, cheddar}                                                                                             => {colrec_ca}                                                  0.03024908  1.0000000 0.03024908  1.00000  1903
## [11] {Salad dressing, mayonnaise, commercial, regular}                                                             => {colrec_ca}                                                  0.02894565  1.0000000 0.02894565  1.00000  1821
## [12] {Egg, chicken, whole, fresh or frozen, raw}                                                                   => {colrec_ca}                                                  0.03619399  1.0000000 0.03619399  1.00000  2277
## [13] {Margarine, tub, composite}                                                                                   => {colrec_ca}                                                  0.04479344  1.0000000 0.04479344  1.00000  2818
## [14] {Shortening, household, unspecified vegetable oil}                                                            => {colrec_ca}                                                  0.06151547  1.0000000 0.06151547  1.00000  3870
## [15] {Egg, chicken, whole, fresh or frozen, raw,                                                                                                                                                                                  
##       Egg, whole, fried, with fat (Scrambled egg, no milk added)}                                                  => {colrec_ca}                                                  0.01193750  1.0000000 0.01193750  1.00000   751
## [16] {colrec_ca,                                                                                                                                                                                                                  
##       Egg, whole, fried, with fat (Scrambled egg, no milk added)}                                                  => {Egg, chicken, whole, fresh or frozen, raw}                  0.01193750  0.7525050 0.01586368 20.79088   751
## [17] {colrec_ca,                                                                                                                                                                                                                  
##       Egg, chicken, whole, fresh or frozen, raw}                                                                   => {Egg, whole, fried, with fat (Scrambled egg, no milk added)} 0.01193750  0.3298199 0.03619399 20.79088   751
plot(sort(fi_rules, by="lift"), method = "graph", engine = "htmlwidget",
     control=list(main = list(title="Grouped Matrix for the 14 Fentanyl-associated Rules")))
## Available control parameters (with default values):
## itemCol   =  #CBD2FC
## nodeCol   =  c("#EE0000", "#EE0303", "#EE0606", "#EE0909", "#EE0C0C", "#EE0F0F", "#EE1212", "#EE1515", "#EE1818", "#EE1B1B", "#EE1E1E", "#EE2222", "#EE2525", "#EE2828", "#EE2B2B", "#EE2E2E", "#EE3131", "#EE3434", "#EE3737", "#EE3A3A", "#EE3D3D", "#EE4040", "#EE4444", "#EE4747", "#EE4A4A", "#EE4D4D", "#EE5050", "#EE5353", "#EE5656", "#EE5959", "#EE5C5C", "#EE5F5F", "#EE6262", "#EE6666", "#EE6969", "#EE6C6C", "#EE6F6F", "#EE7272", "#EE7575", "#EE7878", "#EE7B7B", "#EE7E7E", "#EE8181", "#EE8484", "#EE8888", "#EE8B8B",  "#EE8E8E", "#EE9191", "#EE9494", "#EE9797", "#EE9999", "#EE9B9B", "#EE9D9D", "#EE9F9F", "#EEA0A0", "#EEA2A2", "#EEA4A4", "#EEA5A5", "#EEA7A7", "#EEA9A9", "#EEABAB", "#EEACAC", "#EEAEAE", "#EEB0B0", "#EEB1B1", "#EEB3B3", "#EEB5B5", "#EEB7B7", "#EEB8B8", "#EEBABA", "#EEBCBC", "#EEBDBD", "#EEBFBF", "#EEC1C1", "#EEC3C3", "#EEC4C4", "#EEC6C6", "#EEC8C8", "#EEC9C9", "#EECBCB", "#EECDCD", "#EECFCF", "#EED0D0", "#EED2D2", "#EED4D4", "#EED5D5", "#EED7D7", "#EED9D9", "#EEDBDB", "#EEDCDC", "#EEDEDE", "#EEE0E0",  "#EEE1E1", "#EEE3E3", "#EEE5E5", "#EEE7E7", "#EEE8E8", "#EEEAEA", "#EEECEC", "#EEEEEE")
## precision     =  3
## igraphLayout  =  layout_nicely
## interactive   =  TRUE
## engine    =  visNetwork
## max   =  100
## selection_menu    =  TRUE
## degree_highlight  =  1
## verbose   =  FALSE
# plot(fi_rules, method="graph", measure = "support", engine="htmlwidget", # nodeCol=rainbow(14),
#      shading = "lift", control = list(verbose = TRUE))

Reference to Apriori protocol:

DSPA module from SOCR University of Michigan