## Import install.packages("irr") install.packages("dplyr") ## Import packages library(irr) library(dplyr) ## Import the dataset data <- read_excel("data.xlsx") ## Explore the data dimnames(data) table(data$AREA) summary(data) summary(data$Especialidad) table(data$AREA) table(data$TIPO_ITEM) ## Score of chatbots sum(data$BING_1M) ##Mejor sum(data$BING_2) sum(data$BING_3) sum(data$GPT4_1M) ##Mejor sum(data$GPT4_2) sum(data$GPT4_3) sum(data$GPT3_1) sum(data$GPT3_2) ##Mejor sum(data$GPT3_3) sum(data$CLAUDE_1) ##Mejor sum(data$CLAUDE_2) sum(data$CLAUDE_3) sum(data$BARD_1) sum(data$BARD_2) ##Mejor sum(data$BARD_3) sum(data$BING_1M) ##Mejor sum(data$GPT4_1M) ##Mejor sum(data$GPT3_2) ##Mejor sum(data$CLAUDE_1) ##Mejor sum(data$BARD_2) ##Mejor # Concoordance ## Concordance between BingAI data_BINGAI <- data[, c("BING_1M", "BING_2", "BING_3","AREA")] kappa_bingai <- kappam.fleiss(data_BINGAI) kappa_bingai data_BINGAI2 <- data[, c("BING_1M", "BING_2", "BING_3","TIPO_ITEM")] data_BINGAI2 ## CIRU for BingAI data_BINGAI_CIRU <- subset(data, AREA == "CIRU", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_ciru <- kappam.fleiss(data_BINGAI_CIRU) kappa_bingai_ciru ## MED_INT for BingAI data_BINGAI_MED_INT <- subset(data, AREA == "MED_INT", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_med_int <- kappam.fleiss(data_BINGAI_MED_INT) kappa_bingai_med_int ## PED for BingAI data_BINGAI_PED <- subset(data, AREA == "PED", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_ped <- kappam.fleiss(data_BINGAI_PED) kappa_bingai_ped ## OBGYN for BingAI data_BINGAI_OBGYN <- subset(data, AREA == "OBGYN", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_obgyn <- kappam.fleiss(data_BINGAI_OBGYN) kappa_bingai_obgyn ## SALUD_PUB for BingAI data_BINGAI_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_salud_pub <- kappam.fleiss(data_BINGAI_SALUD_PUB) kappa_bingai_salud_pub ## EMG for BingAI data_BINGAI_EMG <- subset(data, AREA == "EMG", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_emg <- kappam.fleiss(data_BINGAI_EMG) kappa_bingai_emg ## ITEM 1 for BINGAI data_BINGAI_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_I1 <- kappam.fleiss(data_BINGAI_I1) kappa_bingai_I1 data_BINGAI_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("BING_1M", "BING_2", "BING_3")) kappa_bingai_I2 <- kappam.fleiss(data_BINGAI_I2) kappa_bingai_I2 ## Concordance between GPT4 data_GPT4 <- data[, c("GPT4_1M", "GPT4_2", "GPT4_3","AREA")] kappa_gpt4 <- kappam.fleiss(data_GPT4) kappa_gpt4 data_GPT42 <- data[, c("GPT4_1M", "GPT4_2", "GPT4_3","TIPO_ITEM")] data_GPT42 ## CIRU for GPT4 data_GPT4_CIRU data_GPT4_CIRU <- subset(data, AREA == "CIRU", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_ciru <- kappam.fleiss(data_GPT4_CIRU) kappa_gpt4_ciru ## MED_INT for GPT4 data_GPT4_MED_INT <- subset(data, AREA == "MED_INT", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_med_int <- kappam.fleiss(data_GPT4_MED_INT) kappa_gpt4_med_int ## PED for GPT4 data_GPT4_PED <- subset(data, AREA == "PED", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_ped <- kappam.fleiss(data_GPT4_PED) kappa_gpt4_ped ## OBGYN for GPT4 data_GPT4_OBGYN <- subset(data, AREA == "OBGYN", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_obgyn <- kappam.fleiss(data_GPT4_OBGYN) kappa_gpt4_obgyn ## SALUD_PUB for GPT4 data_GPT4_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_salud_pub <- kappam.fleiss(data_GPT4_SALUD_PUB) kappa_gpt4_salud_pub ## EMG for GPT4 data_GPT4_EMG <- subset(data, AREA == "EMG", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_emg <- kappam.fleiss(data_GPT4_EMG) kappa_gpt4_emg ## ITEM 1 for GPT4 data_GPT4_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_I1 <- kappam.fleiss(data_GPT4_I1) kappa_gpt4_I1 data_GPT4_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("GPT4_1M", "GPT4_2", "GPT4_3")) kappa_gpt4_I2 <- kappam.fleiss(data_GPT4_I2) kappa_gpt4_I2 ## GPT-3 ## Concordance between GPT3 data_GPT3 <- data[, c("GPT3_1", "GPT3_2", "GPT3_3","AREA")] kappa_gpt3 <- kappam.fleiss(data_GPT3) kappa_gpt3 data_GPT32 <- data[, c("GPT3_1", "GPT3_2", "GPT3_3","TIPO_ITEM")] data_GPT32 ## CIRU for GPT3 data_GPT3_CIRU <- subset(data, AREA == "CIRU", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_ciru <- kappam.fleiss(data_GPT3_CIRU) kappa_gpt3_ciru ## MED_INT for GPT3 data_GPT3_MED_INT <- subset(data, AREA == "MED_INT", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_med_int <- kappam.fleiss(data_GPT3_MED_INT) kappa_gpt3_med_int ## PED for GPT3 data_GPT3_PED <- subset(data, AREA == "PED", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_ped <- kappam.fleiss(data_GPT3_PED) kappa_gpt3_ped ## OBGYN for GPT3 data_GPT3_OBGYN <- subset(data, AREA == "OBGYN", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_obgyn <- kappam.fleiss(data_GPT3_OBGYN) kappa_gpt3_obgyn ## SALUD_PUB for GPT3 data_GPT3_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_salud_pub <- kappam.fleiss(data_GPT3_SALUD_PUB) kappa_gpt3_salud_pub ## EMG for GPT3 data_GPT3_EMG <- subset(data, AREA == "EMG", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_emg <- kappam.fleiss(data_GPT3_EMG) kappa_gpt3_emg ## ITEM 1 for GPT3 data_GPT3_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_I1 <- kappam.fleiss(data_GPT3_I1) kappa_gpt3_I1 data_GPT3_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("GPT3_1", "GPT3_2", "GPT3_3")) kappa_gpt3_I2 <- kappam.fleiss(data_GPT3_I2) kappa_gpt3_I2 ## Concordance between Claude data_Claude <- data[, c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3","AREA")] kappa_claude <- kappam.fleiss(data_Claude) kappa_claude data_Claude2 <- data[, c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3","TIPO_ITEM")] data_Claude2 ## CIRU for Claude data_Claude_CIRU <- subset(data, AREA == "CIRU", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_ciru <- kappam.fleiss(data_Claude_CIRU) kappa_claude_ciru ## MED_INT for Claude data_Claude_MED_INT <- subset(data, AREA == "MED_INT", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_med_int <- kappam.fleiss(data_Claude_MED_INT) kappa_claude_med_int ## PED for Claude data_Claude_PED <- subset(data, AREA == "PED", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_ped <- kappam.fleiss(data_Claude_PED) kappa_claude_ped ## OBGYN for Claude data_Claude_OBGYN <- subset(data, AREA == "OBGYN", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_obgyn <- kappam.fleiss(data_Claude_OBGYN) kappa_claude_obgyn ## SALUD_PUB for Claude data_Claude_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_salud_pub <- kappam.fleiss(data_Claude_SALUD_PUB) kappa_claude_salud_pub ## EMG for Claude data_Claude_EMG <- subset(data, AREA == "EMG", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_emg <- kappam.fleiss(data_Claude_EMG) kappa_claude_emg ## ITEM 1 for Claude data_Claude_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_I1 <- kappam.fleiss(data_Claude_I1) kappa_claude_I1 data_Claude_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")) kappa_claude_I2 <- kappam.fleiss(data_Claude_I2) kappa_claude_I2 ## Concordance between BARD data_BARD <- data[, c("BARD_1", "BARD_2", "BARD_3","AREA")] kappa_bard <- kappam.fleiss(data_BARD) kappa_bard data_BARD2 <- data[, c("BARD_1", "BARD_2", "BARD_3","TIPO_ITEM")] data_BARD2 ## CIRU for BARD data_BARD_CIRU <- subset(data, AREA == "CIRU", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_ciru <- kappam.fleiss(data_BARD_CIRU) kappa_bard_ciru ## MED_INT for BARD data_BARD_MED_INT <- subset(data, AREA == "MED_INT", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_med_int <- kappam.fleiss(data_BARD_MED_INT) kappa_bard_med_int ## PED for BARD data_BARD_PED <- subset(data, AREA == "PED", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_ped <- kappam.fleiss(data_BARD_PED) kappa_bard_ped ## OBGYN for BARD data_BARD_OBGYN <- subset(data, AREA == "OBGYN", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_obgyn <- kappam.fleiss(data_BARD_OBGYN) kappa_bard_obgyn ## SALUD_PUB for BARD data_BARD_SALUD_PUB <- subset(data, AREA == "SALUD_PUB", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_salud_pub <- kappam.fleiss(data_BARD_SALUD_PUB) kappa_bard_salud_pub ## EMG for BARD data_BARD_EMG <- subset(data, AREA == "EMG", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_emg <- kappam.fleiss(data_BARD_EMG) kappa_bard_emg ## ITEM 1 for BARD data_BARD_I1 <- subset(data, TIPO_ITEM == "\"Memorización\"", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_I1 <- kappam.fleiss(data_BARD_I1) kappa_bard_I1 data_BARD_I2 <- subset(data, TIPO_ITEM == "Aplicación de información (Hacer un Diagnostico, tratamiento o pedir un examen)", select = c("BARD_1", "BARD_2", "BARD_3")) kappa_bard_I2 <- kappam.fleiss(data_BARD_I2) kappa_bard_I2 ## Concordance between GPT-3 data_gpt_3 <- data[, c("GPT3_1", "GPT3_2", "GPT3_3")] kappa_gpt3 <- kappam.fleiss(data_gpt_3) kappa_gpt3 ## Concordance between claude data_claude <- data[, c("CLAUDE_1", "CLAUDE_2", "CLAUDE_3")] kappa_claude <- kappam.fleiss(data_claude) kappa_claude ## Concordance between Bard data_bard <- data[, c("BARD_1", "BARD_2", "BARD_3",)] kappa_bard <- kappam.fleiss(data_bard) kappa_bard ## GPT_CERT table(data$GPT_CERT) table(data$GPT_UTI) table(data$GPT_CLASE) ## BING_CERT table(data$BING_CERT) table(data$BING_UTI) table(data$BING_CLASE) library(bibliometrix) biblioshiny () ## Models regresison sum(data$BING_1M) ##Mejor sum(data$GPT4_1M) ##Mejor sum(data$GPT3_2) ##Mejor sum(data$CLAUDE_1) ##Mejor sum(data$BARD_2) ##Mejor dimnames(data) # Logistic for correct answer ## Bing model_bing1 <- glm(BING_1M ~ AREA, family = binomial, data = data) summary(model_bing1) confint(model_bing1) exp(-1.7466045) exp(1.1055522) confint(model_bing1) model_bing2 <- glm(BING_1M ~ TIPO_ITEM, family = binomial, data = data) summary(model_bing2) exp(0.02247) confint(model_bing2) exp(-1.0416413) exp(0.9564704) model_bing3 <- glm(BING_1M ~ PERU_RQ, family = binomial, data = data) summary(model_bing3) exp(-0.4339) confint(model_bing3) exp(-1.350440) exp(0.5771367) ## GPT-4 model_gpt4 <- glm(GPT4_1M ~ AREA, family = binomial, data = data) summary(model_gpt4) confint(model_gpt4) exp(-1.3122) #EMG exp(-0.9845) #MI exp(-2.0260) #OBGYN exp(-2.0053) #ped exp(-1.4663) #Salud confint(model_gpt4) #conf low exp(-4.628385) #EMG exp(-3.937730) #MI exp(-4.992221) #OBGYN exp(-5.030563) #ped exp(-4.516782) #Salud #conf high exp(1.99859887) #EMG exp(0.80937701) #MI exp(-0.20099191) #OBGYN exp(0.01749489) #ped exp(0.67547675) #Salud model_gpt4 <- glm(GPT4_1M ~ PERU_RQ, family = binomial, data = data) summary(model_gpt4) exp(-1.4499) confint(model_gpt4) exp(-2.383536) exp(-0.5016822) model_gpt4 <- glm(GPT4_1M ~ TIPO_ITEM, family = binomial, data = data) summary(model_gpt4) exp(0.8144) confint(model_gpt4) exp(-0.1715311) exp(1.742624) ## Claude model_claude <- glm(CLAUDE_1 ~ AREA, family = binomial, data = data) summary(model_claude) exp(1.41528) #EMG exp(0.77432) #MI exp(-0.33647) #OBGYN exp(0.07864) #ped exp(1.26113) #Salud confint(model_claude) #conf low exp(-0.5074463) #EMG exp(-0.1986487) #MI exp(-1.4083536) #OBGYN exp(-1.3093244) #ped exp(-0.1046582) #Salud #conf high exp(4.4175602) #EMG exp(1.7304265) #MI exp(0.7132315) #OBGYN exp(1.1717712) #ped exp(2.8788788) #Salud model_claude <- glm(CLAUDE_1 ~ PERU_RQ, family = binomial, data = data) summary(model_claude) exp(-0.06492) confint(model_claude) exp(-0.8646068) exp(0.7927101) model_claude <- glm(CLAUDE_1 ~ TIPO_ITEM, family = binomial, data = data) summary(model_claude) exp(-0.4878) confint(model_claude) exp(-1.4040205) exp(0.3321087) ## Bard model_bard <- glm(BARD_2 ~ AREA, family = binomial, data = data) summary(model_bard) exp(0.8961) #EMG exp(-0.2341) #MI exp(-0.8557) #OBGYN exp(0.5978) #ped exp(0.3971) #Salud confint(model_bard) #conf low exp(-1.0747798) #EMG exp(-1.2793137) #MI exp(-2.0135388) #OBGYN exp(-1.9015902) #ped exp(-0.9655411) #Salud #conf high exp(3.9125919) #EMG exp(0.7262562) #MI exp(0.2359154) #OBGYN exp(0.6893382) #ped exp(1.8713228) #Salud model_bard <- glm(BARD_2 ~ PERU_RQ, family = binomial, data = data) summary(model_bard) exp(-0.3940) confint(model_bard) exp(-1.1638578) exp(0.4026769) model_bard <- glm(BARD_2 ~ TIPO_ITEM, family = binomial, data = data) summary(model_bard) exp(-0.8473) confint(model_bard) exp(-1.8101870) exp(-0.005664824) ## GPT-3 model_gpt3 <- glm(GPT3_2 ~ AREA, family = binomial, data = data) summary(model_gpt3) exp(-0.86500) #EMG exp(0.14660) #MI exp(-0.12306) #OBGYN exp(-0.64185) #ped exp(0.05129) #Salud confint(model_gpt3) #conf low exp(-2.52788795) #EMG exp(-0.86402885) #MI exp(-1.26221544) #OBGYN exp(-1.90912529) #ped exp(-1.20452759) #Salud #conf high exp(0.7762524) #EMG exp(1.0994872) #MI exp(0.9950735) #OBGYN exp(0.6029458) #ped exp(1.3417623) #Salud model_gpt3 <- glm(GPT3_2 ~ PERU_RQ, family = binomial, data = data) summary(model_gpt3) exp(-0.3940) confint(model_gpt3) exp(-1.1638578) exp(0.4026769) model_gpt3 <- glm(GPT3_2 ~ TIPO_ITEM, family = binomial, data = data) summary(model_gpt3) exp(-0.1299) confint(model_gpt3) exp(-0.9478464) exp(0.6350282)