# Authors: [Anonymized] # Affiliation: [Anonymized] # Description: R script to analyse healthcare complaints # Data format: Healthcare Complaints Analysis Tool (HCAT) # License: Creative Commons Attribution-ShareAlike 4.0 ################################################################################ # LOAD LIBRARIES AND DATA ################################################################################ library(tidyverse) library(psych) library(gdata) library(vcd) library(forcats) library(magrittr) # load the data (csv) file data_raw <- read.csv("5 Data.csv") ################################################################################ # DESCRIPTIVES: TRUSTS ################################################################################ nrow(data_raw) # total complaints # number of trusts, with mean and SD temp <- data_raw %>% group_by(Trust.ID) %>% count() describe(temp$n) # complaints per 1k admission describe(data_raw$Complaint.frequency) # wordcount descriptives sum(data_raw$Wordcount) describe(data_raw$Wordcount) ################################################################################ # DESCRIPTIVES: COMPLAINTS ################################################################################ # function to calculate frequencies and percent for categorical variables freq.and.pct <- function(df, variable) { eval(substitute(df %>% count(variable))) %>% mutate(pct = round(((n / nrow( df )) * 100), digits = 0)) } # calculate frequencies and percentages freq.and.pct(data_raw, Region) freq.and.pct(data_raw, Complainant) freq.and.pct(data_raw, Gender) freq.and.pct(data_raw, Harm) ################################################################################ # CREATE DATA FRAME OF PROBLEMS ################################################################################ # change from 'wide' to 'long' format; with 'Problem sequence' variable data <- reshape( data_raw, varying = 9:32, sep = ".", direction = "long", timevar = "Sequence", idvar = "ID" ) # order factors data <- data %>% mutate( Region = factor( Region, level = c( "South", "Midlands", "North")), Gender = factor( Gender, level = c( "Female", "Male", "NA")), Complainant = factor( Complainant, level = c( "Patient", "Family", "Other")), Harm = ordered( Harm, level = c( "None", "Minimal", "Minor", "Moderate", "Major", "Catastrophic")), Problem = factor( Problem, level = c( "Quality", "Safety", "Communication", "Listening", "Rights", "Environment", "Institution", "Other")), Severity = ordered( Severity, level = c( "Low", "Medium", "High")), Stage = factor( Stage, level = c( "Admission", "Examination", "Ward", "Procedure", "Discharge", "Other", "Multiple")), Staff = factor( Staff, level = c( "Administration", "Nursing", "Medical", "Other", "Multiple")) ) # remove rows that have no problem (problem = NA) data <- data[is.na(data$Problem) == FALSE, ] ## create 'domain' variable data$Domain <- data$Problem %>% fct_recode( "Clinical" = "Quality", "Clinical" = "Safety", "Relationship" = "Communication", "Relationship" = "Listening", "Relationship" = "Rights", "Management" = "Environment", "Management" = "Institution", "Other" = "Other" ) # order domain factor data$Domain <- factor(data$Domain, levels = c("Clinical", "Relationship", "Management", "Other")) ################################################################################ # DESCRIPTIVES: PROBLEMS ################################################################################ count(data, ID) %$% sum(n) # total problems count(data, ID) %$% mean(n) # mean problems count(data, ID) %$% sd(n) # standard deviation # calculating frequencies and percentages freq.and.pct(data, Problem) freq.and.pct(data, Domain) freq.and.pct(data, Severity) freq.and.pct(data, Stage) freq.and.pct(data, Staff) # correlation between harm and clinical severity data %>% filter(Domain == "Clinical") %$% cor.test(as.numeric(Harm), as.numeric(Severity), method = "spearm", exact = FALSE) # correlation between harm and relationship severity data %>% filter(Domain == "Relationship") %$% cor.test(as.numeric(Harm), as.numeric(Severity), method = "spearm", exact = FALSE) # correlation between harm and management severity data %>% filter(Domain == "Management") %$% cor.test(as.numeric(Harm), as.numeric(Severity), method = "spearm", exact = FALSE) ################################################################################ # DESCRIPTIVES: REGIONAL DIFFERENCES ################################################################################ # does complaint frequency vary by region? # (note: admission data missing for three trusts) temp <- data %>% select(Trust.ID, Region, Complaint.frequency) %>% unique() # plot differences ggplot(temp) + aes(x = Region, y = Complaint.frequency) + geom_boxplot() # test differences results_anova <- aov(Complaint.frequency ~ Region, data = temp) summary(results_anova) # order complaint-level variables data_raw$Region <- ordered(data_raw$Region, levels = c("South", "Midlands", "North")) data_raw$Complainant <- ordered(data_raw$Complainant, levels = c("Patient", "Family", "Other")) data_raw$Harm <- ordered( data_raw$Harm, levels = c("None", "Minimal", "Minor", "Moderate", "Major", "Catastrophic") ) # cross tabulation for Region*Gender (complaint-level) crosstab <- table(data_raw$Region, data_raw$Gender) # create crosstab t(crosstab) # print crosstab chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals # cross tabulation for Region*Complainant (complaint-level) crosstab <- table(data_raw$Region, data_raw$Complainant) # create crosstab t(crosstab) # print crosstab chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals # cross tabulation for Region*Problem (problem-level) crosstab <- table(data$Region, drop.levels(data$Problem)) # create crosstab t(crosstab) # print crosstab chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals # cross tabulation for Region*Harm (complaint-level) crosstab <- table(data_raw$Region, data_raw$Harm) # create crosstab t(crosstab) # print crosstab chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals # calculating percent major and catastrophic harm in each region total <- as.data.frame(t(crosstab)) %>% group_by(Var2) %>% tally(Freq) high <- as.data.frame(t(crosstab)) %>% filter(Var1 == "Major" | Var1 == "Catastrophic") %>% group_by(Var2) %>% tally(Freq) round((high$n / total$n) * 100, digits = 3) ################################################################################ # FIGURE 3: MOSAIC PLOT OF STAGE BY HARM ################################################################################ # add spaces into levels (other, multiple) to prevent text in figure overlapping data2 <- data levels(data2$Stage)[levels(data2$Stage) == "Other"] <- "Other " levels(data2$Stage)[levels(data2$Stage) == "Multiple"] <- " Multiple" # prepare plot crosstab <- table(data2$Stage, data2$Harm) # create crosstab table_proportions <- round(prop.table(crosstab) * 100) # calculate pct cell_values <- c(crosstab) # capture values rows <- dimnames(crosstab)[[1]] # capture row names columns <- dimnames(crosstab)[[2]] # capture column names variables <- c("Stage", "Harm") # give variables names dimensions <- dim(crosstab) table_values <- structure( c(cell_values), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text <- structure( c(paste( table_proportions, "%", " (", cell_values, ")", sep = "" )), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text[table_values <= 20] <- "" # removes text if n < 20 table_text[6:7, ] <- "" # removes text in rows 6 & 7 (too narrow) # plot mosaic( table_values, pop = FALSE, gp = shading_Friendly2(h = c(0, 200), c = c(60, 0)), keep_aspect_ratio = FALSE, spacing = spacing_equal(0.4), rot_labels = c(top = 0, left = 0), just_labels = c(top = "center", left = "right"), margin = c( top = 6, left = 8, bottom = 4, right = 1.5 ), offset_varnames = c(top = 1, left = 4.5), direction = "v" ) labeling_cells(text = table_text, clip_cells = FALSE)(table_values) # tables and statistics t(crosstab) # print crosstab t(table_proportions) # print cell percent chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals ################################################################################ # FIGURE 4: MOSAIC PLOT OF STAGE BY PROBLEM (ONLY MAJOR & CATASTROPHIC HARM) ################################################################################ # create subset with only major and catastrophic harm df <- data %>% filter(Harm == "Catastrophic" | Harm == "Major") %>% filter(Stage != "Other" & Stage != "Multiple") %>% filter(Problem != "Other") %>% drop.levels(df, reorder = FALSE) # descriptives for stage in subset freq.and.pct(df, Stage) # prepare plot crosstab <- table(df$Stage, df$Problem) # create crosstab table_proportions <- round(prop.table(crosstab) * 100) # calculate pct cell_values <- c(crosstab) # capture values rows <- dimnames(crosstab)[[1]] # capture row names columns <- dimnames(crosstab)[[2]] # capture column names variables <- c("Stage", "Problem") # give variables names dimensions <- dim(crosstab) table_values <- structure( c(cell_values), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text <- structure( c(paste( table_proportions, "%", " (", cell_values, ")", sep = "" )), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text[table_values <= 2] <- "" # removes text if n < 2 # plot mosaic( table_values, pop = FALSE, gp = shading_Friendly2(h = c(0, 200), c = c(60, 0)), keep_aspect_ratio = FALSE, spacing = spacing_equal(0.4), rot_labels = c(top = 0, left = 0), just_labels = c(top = "center", left = "right"), margin = c( top = 6, left = 8, bottom = 4, right = 1.5 ), offset_varnames = c(top = 1, left = 4.5), direction = "v" ) labeling_cells(text = table_text, clip_cells = FALSE)(table_values) # tables and statistics t(crosstab) # print crosstab t(table_proportions) # print cell percent chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals ################################################################################ # FIGURE 5: MOSAIC PLOT OF STAGE BY NEAR-MISSES ################################################################################ # filter for high severity with only minor, minimal or no harm df <- data %>% filter(Harm != "Catastrophic" & Harm != "Major" & Harm != "Moderate") %>% filter(Stage != "Other" & Stage != "Multiple") %>% filter(Problem != "Other") %>% filter(Severity == "High") %>% drop.levels(df, reorder = FALSE) nrow(df) # descriptives freq.and.pct(df, Stage) # prepare plot crosstab <- table(df$Stage, df$Problem) # create crosstab table_proportions <- round(prop.table(crosstab) * 100) # calculate pct cell_values <- c(crosstab) # capture values rows <- dimnames(crosstab)[[1]] # capture row names columns <- dimnames(crosstab)[[2]] # capture column names variables <- c("Stage", "Problem") # give variables names dimensions <- dim(crosstab) table_values <- structure( c(cell_values), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text <- structure( c(paste( table_proportions, "%", " (", cell_values, ")", sep = "" )), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text[table_values <= 1] <- "" # removes text if n <= 1 # plot mosaic( table_values, pop = FALSE, gp = shading_Friendly2(h = c(0, 200), c = c(60, 0)), keep_aspect_ratio = FALSE, spacing = spacing_equal(0.4), rot_labels = c(top = 0, left = 0), just_labels = c(top = "center", left = "right"), margin = c( top = 6, left = 8, bottom = 4, right = 1.5 ), offset_varnames = c(top = 1, left = 4.5), direction = "v" ) labeling_cells(text = table_text, clip_cells = FALSE)(table_values) # tables and statistics t(crosstab) # print crosstab t(table_proportions) # print cell percent # too many small values for chi square test ################################################################################ # BLINDSPOT: SYSTEMIC PROBLEMS AND HARM ################################################################################ freq.and.pct(data, Sequence) # create data frame df_reg <- data_raw df_reg$Problem.count <- select(data_raw, Problem.1, Problem.2, Problem.3, Problem.4, Problem.5, Problem.6) %>% apply(1, function(x) length(unique(x))) %>% -1 # to discount NA values df_reg$Staff.count <- select(data_raw, Staff.1, Staff.2, Staff.3, Staff.4, Staff.5, Staff.6) %>% apply(1, function(x) length(unique(x))) %>% -1 # to discount NA values df_reg$Stage.count <- select(data_raw, Stage.1, Stage.2, Stage.3, Stage.4, Stage.5, Stage.6) %>% apply(1, function(x) length(unique(x))) %>% -1 # to discount NA values df_reg <- select(df_reg, Harm, Problem.count, Staff.count, Stage.count) df_reg$Harm <- ordered( df_reg$Harm, levels = c("None", "Minimal", "Minor", "Moderate", "Major", "Catastrophic") ) library(MASS, pos = "package:base") model.polr <- polr(Harm ~ Problem.count + Stage.count + Staff.count, data = df_reg, Hess = TRUE) summary(model.polr) # to create p values (model.polr.table <- coef(summary(model.polr))) # store the table p <- pnorm(abs(model.polr.table[, "t value"]), lower.tail = FALSE) * 2 # calc p (model.polr.table <- cbind(model.polr.table, "p value" = p)) # bind p to table exp(cbind(OR = coef(model.polr), # odds ratios confint(model.polr, level = 0.95))) # confidence intervals)) ################################################################################ # FIGURE 6: MOSAIC PLOT OF HARM BY NUMBER OF PROBLEMS ################################################################################ # create new data frame df_comp <- df_reg df_comp$Problem.count <- factor(df_comp$Problem.count) df_comp$Problem.count <- fct_collapse( df_comp$Problem.count, "One" = "1", "Two" = "2", "Three+" = c("3", "4", "5") ) df_comp$Problem.count <- factor(df_comp$Problem.count, levels = c("One", "Two", "Three+")) # descriptives freq.and.pct(df_comp, Problem.count) # prepare plot crosstab <- table(df_comp$Problem.count, df_comp$Harm) # create crosstab table_proportions <- round(prop.table(crosstab) * 100) # calculate pct cell_values <- c(crosstab) # capture values rows <- dimnames(crosstab)[[1]] # capture row names columns <- dimnames(crosstab)[[2]] # capture column names variables <- c("Number of problems", "Harm") # give variables names dimensions <- dim(crosstab) table_values <- structure( c(cell_values), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text <- structure( c(paste( table_proportions, "%", " (", cell_values, ")", sep = "" )), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text[table_values <= 24] <- "" # removes text below 24 # plot mosaic( table_values, pop = FALSE, gp = shading_Friendly2(h = c(0, 200), c = c(60, 0)), # color adjust keep_aspect_ratio = FALSE, spacing = spacing_equal(0.4), rot_labels = c(top = 0, left = 0), just_labels = c(top = "center", left = "right"), margin = c( top = 6, left = 8, bottom = 4, right = 1.5 ), offset_varnames = c(top = 1, left = 4.5), direction = "v" ) labeling_cells(text = table_text, clip_cells = FALSE)(table_values) # tables and statistics t(crosstab) # print crosstab t(table_proportions) # print cell percent chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals ################################################################################ # FIGURE 7: MOSAIC PLOT OF PROBLEM BY ORDER REPORTED ################################################################################ df <- data %>% filter(Problem != "Other") %>% drop.levels(df, reorder = FALSE) df$Sequence <- as.factor(df$Sequence) df$Sequence <- fct_collapse( df$Sequence, "First" = "1", "Second" = "2", "Third+" = c("3", "4", "5") ) df$Sequence <- factor(df$Sequence, levels = c("First", "Second", "Third+")) # descriptives freq.and.pct(df, Sequence) # prepare plot crosstab <- table(df$Sequence, df$Problem) # create crosstab table_proportions <- round(prop.table(crosstab) * 100) # calculate pct cell_values <- c(crosstab) # capture values rows <- dimnames(crosstab)[[1]] # capture row names columns <- dimnames(crosstab)[[2]] # capture column names variables <- c("Order reported", "Problem") # give variables names dimensions <- dim(crosstab) table_values <- structure( c(cell_values), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) table_text <- structure( c(paste( table_proportions, "%", " (", cell_values, ")", sep = "" )), .Dim = as.integer(dimensions), .Dimnames = structure(list(rows, columns), .Names = c(variables)), class = "table" ) # plot mosaic( table_values, pop = FALSE, gp = shading_Friendly2(h = c(0, 200), c = c(60, 0)), # color adjust keep_aspect_ratio = FALSE, spacing = spacing_equal(0.4), rot_labels = c(top = 0, left = 0), just_labels = c(top = "center", left = "right"), margin = c( top = 6, left = 8, bottom = 4, right = 1.5 ), offset_varnames = c(top = 1, left = 4.5), direction = "v" ) labeling_cells(text = table_text, clip_cells = FALSE)(table_values) # tables and statistics t(crosstab) # print crosstab t(table_proportions) # print cell percent chisq <- chisq.test(crosstab, correct = FALSE) chisq t(chisq$expected) # expected values t(chisq$residuals) # residuals