#Data preparation and graph drawings using Python

import pandas as pd
import glob, openpyxl
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

#Import dataset 
df = pd.read_excel('Dataset.xlsx')

#Set the value type of columns to "numeric"
column_floats = ('NIID_NP_Ct', 'NIID_NS_Ct','cobas_NS_ORF1ab_Ct', 'cobas_NS_Egene_Ct', 'Liat_NP_Ct','Liat_NS_Ct',)
for column_float in column_floats:
    df[column_float] = pd.to_numeric(df[column_float]) 
    
#Create a new column to describe cases positive on at least one test with nasopharyngeal samples  
df.loc[(df["Liat_NP"] == "Positive") | (df["NIID_NP"] == "Positive"), "SARS-CoV-2"] = "Positive"
df.loc[(df["Liat_NP"] == "Negative") & (df["NIID_NP"] == "Negative"), "SARS-CoV-2"] = "Negative"

#Create crosstables
variables=["NIID_NS","cobas_NS", "Liat_NP", "SARS-CoV-2"]

Crosstable_Liat_NS = {} 
Crosstable_NIID_NP = {}

for i in variables:
    if i == "Liat_NP":
        Crosstable_Liat_NS[i] = pd.crosstab(df['Liat_NS'], df[i], margins=True)
        Crosstable_NIID_NP = pd.crosstab(df[i], df['NIID_NP'], margins=True)
        print(Crosstable_Liat_NS[i])
        print(Crosstable_NIID_NP)
    else:
        Crosstable_Liat_NS[i] = pd.crosstab(df['Liat_NS'], df[i], margins=True)
        print(Crosstable_Liat_NS[i]) 
        
##NIID_NS vs Liat_NS
NIID_NS = Crosstable_Liat_NS['NIID_NS']

##cobas_NS vs Liat_NS
cobas_NS = Crosstable_Liat_NS['cobas_NS']

##Liat_NP vs Liat_NS
Liat_NP = Crosstable_Liat_NS['Liat_NP']

##SARS-CoV-2 vs Liat_NS
SARS_CoV_2 = Crosstable_Liat_NS['SARS-CoV-2']

##Liat_NP vs NIID_NP
NIID_NP = Crosstable_NIID_NP



#Create crosstables for groups stratified by the presence of symptoms
##Select symptomatic participants
df_symptomatic = df.query('Symptom == 1')

###cobas_NS vs Liat_NS in symptomatic patients
SymptomaticPt_cobas = pd.crosstab(df_symptomatic['Liat_NS'], df_symptomatic['cobas_NS'], margins=True)

###NIID_NS vs Liat_NS in symptomatic patients
SymptomaticPt_NIID = pd.crosstab(df_symptomatic['Liat_NS'], df_symptomatic['NIID_NS'], margins=True)


##Select asymptomatic participants
df_asymptomatic = df.query('Symptom == 0')

###cobas_NS vs Liat_NS in symptomatic patients
AsymptomaticPt_cobas = pd.crosstab(df_asymptomatic['Liat_NS'], df_asymptomatic['cobas_NS'], margins=True)

###NIID_NS vs Liat_NS in symptomatic patients
AsymptomaticPt_NIID = pd.crosstab(df_asymptomatic['Liat_NS'], df_asymptomatic['NIID_NS'], margins=True)

#Select participants who provided both nasal and nasopharyngeal samples for Liat testing
Both_samples = df.loc[~df['Liat_NS'].isnull() & ~df['Liat_NP'].isnull()]


#Set a function to draw graphs

def drawgraph (data,i,j):
    _newcol = f"{i}_new" 
    _newcol2 = f"{j}_new" 
    _newcol3 = f"{i}and{j}_posneg" 

    _df = data.copy()
    
    _df[_newcol] = _df[i]
    _df[_newcol2] = _df[j]
    _df[_newcol3] = ""

    #Create a new column to differentiate cases positive on one test from cases positive on both tests  
    _df.loc[~_df[_newcol].isnull() & _df[_newcol2].isnull(),   f"{i}and{j}_posneg"] = "One_positive"
    _df.loc[_df[_newcol].isnull() & ~_df[_newcol2].isnull(),   f"{i}and{j}_posneg"] = "One_positive"
    _df.loc[~_df[_newcol].isnull() & ~_df[_newcol2].isnull(),   f"{i}and{j}_posneg"] = "Both_positive"

    
    #Insert the number "45" into cells for cases negative for each corresponding test
    _df.loc[_df[_newcol].isnull() & ~_df[_newcol2].isnull(), f"{i}_new"] = 45 
    _df.loc[~_df[_newcol].isnull() & _df[_newcol2].isnull(), f"{j}_new" ] = 45
    
    #Specify the X and Y axis settings for the regression line
    ## Omit cases with the result "45" (negative results)
    x1 = f'{i}' 
    y1 = f'{j}' 

    #Specify the X and Y axis settings for the scatterplot
    x2 = _newcol
    y2 = _newcol2
    posneg = _newcol3
    
    # Set the fonts and font sizes of the plot
    fig, ax = plt.subplots(figsize=(6, 6), dpi=120)
    plt.rcParams["font.family"] = "Times New Roman"

    #Set the locator for major axis markers
    ax.xaxis.set_major_locator(ticker.MultipleLocator(5))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
        
    #Set the display limits for the axes
    ax.set_xlim(10,47)
    ax.set_ylim(10,47)
    
    #Delete the right and upper spines
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)

    #Change the ratio of the X and Y axes
    ax.set_aspect('equal')
 
    #Draw graphs
    sns.scatterplot(x=x2, y=y2, hue=posneg, data=_df, palette=['white','blue','dodgerblue'],alpha=0.5, legend=False)
    sns.regplot(x=x1,y=y1,data=_df,color='gray',scatter=False)
    sns.set_context("talk",1 ,{"lines.linewidth": 1})       


# Figure 1a
##The number "45" on graphs was replace by "Neg" using graphic software.
drawgraph(df, 'Liat_NS_Ct', 'cobas_NS_Egene_Ct')


#Figure 1b
drawgraph(df, 'Liat_NS_Ct', 'cobas_NS_ORF1ab_Ct')


#Figure 1c
drawgraph(df, 'Liat_NS_Ct', 'NIID_NS_Ct')


#Figure 1d
drawgraph(df, 'Liat_NP_Ct', 'NIID_NP_Ct')

#Figure 2a
drawgraph(Both_samples, 'Liat_NS_Ct', 'Liat_NP_Ct')

#Figure 2b
drawgraph(Both_samples, 'NIID_NS_Ct', 'NIID_NP_Ct')




#Statistical analyses using R

library(reticulate)
library(coin)

##Import Python data using R studio
Liat <- py$df

#Table 1a
##Liat_NS vs cobas_NS in all participants
cobas_NS <- py$cobas_NS
binom.test(cobas_NS[1,1]+cobas_NS[2,2], cobas_NS[3,3]) #Total concordance
binom.test(cobas_NS[2,2], cobas_NS[3,2]) #Pos conconrdance
binom.test(cobas_NS[1,1], cobas_NS[3,1]) #Neg concordance


##Liat_NS vs cobas_NS in symptomatic participants
SymptomaticPt_cobas <-py$SymptomaticPt_cobas
binom.test(SymptomaticPt_cobas[1,1]+SymptomaticPt_cobas[2,2], SymptomaticPt_cobas[3,3]) #Total concordance
binom.test(SymptomaticPt_cobas[2,2], SymptomaticPt_cobas[3,2]) #Pos conconrdance
binom.test(SymptomaticPt_cobas[1,1], SymptomaticPt_cobas[3,1]) #Neg concordance

##Liat_NS vs cobas_NS in asymptomatic participants
AsymptomaticPt_cobas <- py$AsymptomaticPt_cobas
binom.test(AsymptomaticPt_cobas[1,1]+AsymptomaticPt_cobas[2,2], AsymptomaticPt_cobas[3,3]) #Total concordance
binom.test(AsymptomaticPt_cobas[2,2], AsymptomaticPt_cobas[3,2]) #Pos conconrdance
binom.test(AsymptomaticPt_cobas[1,1], AsymptomaticPt_cobas[3,1]) #Neg concordance


#Table 1b
#Liat_NS vs NIID_NS in all participants
NIID_NS <- py$NIID_NS
binom.test(NIID_NS[1,1]+NIID_NS[2,2], NIID_NS[3,3]) #Total concordance
binom.test(NIID_NS[2,2], NIID_NS[3,2]) #Pos conconrdance
binom.test(NIID_NS[1,1], NIID_NS[3,1]) #Neg concordance


#Liat_NS vs NIID_NS in symptomatic participants
SymptomaticPt_NIID <- py$SymptomaticPt_NIID
binom.test(SymptomaticPt_NIID[1,1]+SymptomaticPt_NIID[2,2], SymptomaticPt_NIID[3,3]) #Total concordance
binom.test(SymptomaticPt_NIID[2,2], SymptomaticPt_NIID[3,2]) #Pos conconrdance
binom.test(SymptomaticPt_NIID[1,1], SymptomaticPt_NIID[3,1]) #Neg concordance

#Liat_NS vs NIID_NS in asymptomatic participants
AsymptomaticPt_NIID <- py$AsymptomaticPt_NIID
binom.test(AsymptomaticPt_NIID[1,1]+AsymptomaticPt_NIID[2,2], AsymptomaticPt_NIID[3,3]) #Total concordance
binom.test(AsymptomaticPt_NIID[2,2], AsymptomaticPt_NIID[3,2]) #Pos conconrdance
binom.test(AsymptomaticPt_NIID[1,1], AsymptomaticPt_NIID[3,1]) #Neg concordance


#Liat_NP vs NIID_NP
NIID_NP<-py$NIID_NP
binom.test(NIID_NP[2,1]+NIID_NP[3,2], NIID_NP[2,3]+NIID_NP[3,3]) #Total concordance
binom.test(NIID_NP[3,2], NIID_NP[4,2]) #Pos conconrdance
binom.test(NIID_NP[2,1], NIID_NP[4,1]) #Neg concordance


#Sensitivity, specificity, PPV and NPV of Liat_NS
SARS_CoV_2 <- py$SARS_CoV_2
binom.test(SARS_CoV_2[2,2], SARS_CoV_2[3,2]) #Sensitivity
binom.test(SARS_CoV_2[1,1], SARS_CoV_2[3,1]) #Specificity
binom.test(SARS_CoV_2[2,2], SARS_CoV_2[2,3]) #PPV
binom.test(SARS_CoV_2[1,1], SARS_CoV_2[1,3]) #NPV



#Summarize the Ct values for each type of analyzer
summary(Liat$Liat_NS_Ct)
summary(Liat$NIID_NS_Ct)
summary(Liat$cobas_NS_ORF1ab_Ct)
summary(Liat$cobas_NS_Egene_Ct)

summary(Liat$NIID_NP_Ct)
summary(Liat$Liat_NP_Ct)

#Compare the Ct values between analyzers
wilcox.exact(Liat$Liat_NS_Ct, Liat$cobas_NS_Egene_Ct, paired=T)
wilcox.exact(Liat$Liat_NS_Ct, Liat$cobas_NS_ORF1ab_Ct, paired=T)
wilcox.exact(Liat$Liat_NS_Ct, Liat$NIID_NS_Ct, paired=T)
wilcox.exact(Liat$Liat_NP_Ct, Liat$NIID_NP_Ct, paired=T)

cor.test(Liat$Liat_NS_Ct, Liat$cobas_NS_Egene_Ct)
cor.test(Liat$Liat_NS_Ct, Liat$cobas_NS_ORF1ab_Ct)
cor.test(Liat$Liat_NS_Ct, Liat$NIID_NS_Ct)
cor.test(Liat$Liat_NP_Ct, Liat$NIID_NP_Ct)


#Compare the Ct values of nasal and nasopharyngeal samples
Both_samples <- py$Both_samples
wilcox.exact(Both_samples$Liat_NS_Ct, Both_samples$Liat_NP_Ct, paired=T)
wilcox.exact(Both_samples$NIID_NS_Ct, Both_samples$NIID_NP_Ct, paired=T)

cor.test(Both_samples$Liat_NS_Ct, Both_samples$Liat_NP_Ct, paired=T)
cor.test(Both_samples$NIID_NS_Ct, Both_samples$NIID_NP_Ct, paired=T)


lm(Liat_NP_Ct ~ Liat_NS_Ct, data=Liat)
cor(Both_samples$Liat_NP_Ct, Both_samples$Liat_NS_Ct)
cor(Both_samples$NIID_NP_Ct, Both_samples$NIID_NS_Ct)