# load SPACE package
# aroma.affymetrix package will also be loaded
library(SPACE)

# Set a proper directory to work with aroma.affymetrix
# From this directory there should be several directories such as
# annotationData, rawData, etc.
setwd("E:/workingDir")

# Download custom cdfs for Ensembl from 
# http://brainarray.mbni.med.umich.edu/Brainarray/Database/CustomCDF/CDF_download.asp

# Go to last version (version 13) of the cdf files
# Click on ENSG, ENST and ENSE versions

# Go to Homo_sapiens and HuEx10stv2 (Affymetrix exon array) row
# Click on the last option CDF/Seq/Map/Desc to download each file
# Extract the cdf files from zip compresed files 

# Brainarray cdf files are in ascii format
# They should be converted into binary format to be used with aroma.affymetrix
# using the convertCdf command (It will take time)
# This conversion should be done only once
#convertCdf("HuEx10stv2_Hs_ENSG.cdf", "HuEx-1_0-st-v2,ENSG,brainarray,v13.cdf")
#convertCdf("HuEx10stv2_Hs_ENST.cdf", "HuEx-1_0-st-v2,ENST,brainarray,v13.cdf")
#convertCdf("HuEx10stv2_Hs_ENST.cdf", "HuEx-1_0-st-v2,ENST,brainarray,v13.cdf")


#Download Affymetrix exon array sample dataset for human tissues from Affymetrix
#Only 6 samples will be used corresponding to kidney and liver tissues
#Once all the files have been downloaded, the must be arrange in the
#following directory structure

#D:/workingDir
#      /annotationData
#           /chipTypes
#                /HuEx-1_0-st-v2
#                     HuEx-1_0-st-v2,ENSG,brainarray,v13.cdf
#                     HuEx-1_0-st-v2,ENST,brainarray,v13.cdf
#                     HuEx-1_0-st-v2,ENSE,brainarray,v13.cdf
#      /rawData
#           /humanLiverKidney
#                /HuEx-1_0-st-v2
#                     huex_wta_kidney_A.CEL
#                     huex_wta_kidney_B.CEL
#                     huex_wta_kidney_C.CEL
#                     huex_wta_liver_A.CEL
#                     huex_wta_liver_B.CEL
#                     huex_wta_liver_C.CEL


# Background correction and normalization using aroma.affymetrix

verbose <- Arguments$getVerbose(-8)
timestampOn(verbose)

projectName <- "humanLiverKidney"
chipType <- "HuEx-1_0-st-v2"
cdfGeneFile <- "HuEx-1_0-st-v2,ENSG,brainarray,v13"
cdfTranscriptFile <- "HuEx-1_0-st-v2,ENST,brainarray,v13"
cdfExonFile <- "HuEx-1_0-st-v2,ENSE,brainarray,v13"

cdfG <- AffymetrixCdfFile$byChipType(cdfGeneFile)
cdfT <- AffymetrixCdfFile$byChipType(cdfTranscriptFile)
cdfE <- AffymetrixCdfFile$byChipType(cdfExonFile)

cs <- AffymetrixCelSet$byName(projectName, cdf=cdfG)

bc <- NormExpBackgroundCorrection(cs, method="mle", tag=c("*","ensembl"));
csBC <- process(bc,verbose=verbose);

qn <- QuantileNormalization(csBC, typesToUpdate="pm")
csN <- process(qn,verbose=verbose)

# Analysis with LiWangWong method
isoPlmLWW <- IsoformPlm(csN,
                        method="LiWangWong",
                        cdfT=cdfTranscriptFile,
                        cdfE=cdfExonFile,
                        maxIter=2000,
                        fileName="LWWTEST")

print("isoPlm object constructed")
outLWW <- fit(isoPlmLWW,verbose=TRUE)
# Last function actually performs the summarization.

TListLWW <- getTranscriptConcentrationList(isoPlmLWW)
AListLWW <- getProbeAffinityList(isoPlmLWW)
GListLWW <- getTranscriptStructureList(isoPlmLWW, Gmatrix="G")
GcListLWW <- getTranscriptStructureList(isoPlmLWW, Gmatrix="Gc")
TMatrixLWW <- getTranscriptConcentrationMatrix(isoPlmLWW)


# Analysis with SPACE method
# Estimating the number of transcripts
isoPlmSPACEest <- IsoformPlm(csN,
                             method="SPACE",
                             nbrOfTranscripts="estimate",
                             cdfT=cdfTranscriptFile,
                             cdfE=cdfExonFile,
                             fileName="SPACETEST")

print("isoPlm object constructed")
outSPACEest <- fit(isoPlmSPACEest,verbose=TRUE)
# Last function actually performs the summarization.

TListSPACEest <- getTranscriptConcentrationList(isoPlmSPACEest)
AListSPACEest <- getProbeAffinityList(isoPlmSPACEest)
GListSPACEest <- getTranscriptStructureList(isoPlmSPACEest, Gmatrix="G")
GcListSPACEest <- getTranscriptStructureList(isoPlmSPACEest, Gmatrix="Gc")
TMatrixSPACEest <- getTranscriptConcentrationMatrix(isoPlmSPACEest)

# If it is only needed for some genes
isoPlmSPACEestGenes <- IsoformPlm(csN,method="SPACE",nbrOfTranscripts="estimate",
                                  cdfT=cdfTranscriptFile,cdfE=cdfExonFile,
                                  fileName="SPACETEST_Genes")
outSPACEestGenes <- fit(isoPlmSPACEestGenes,verbose=TRUE,units=c(186,196,241))
TMatrixSPACEestGenes <- getTranscriptConcentrationMatrix(isoPlmSPACEestGenes)

# Get the unit that corresponds to an Ensembl identifier
isoPlmSPACEestEnsembl <- IsoformPlm(csN,method="SPACE",nbrOfTranscripts="estimate",
                                    cdfT=cdfTranscriptFile,cdfE=cdfExonFile,
                                    fileName="SPACETEST_Ensembl")
unit <- match("ENSG00000005302_at",getUnitNames(cdfG))
outSPACEestEnsembl <- fit(isoPlmSPACEestEnsembl,units=unit,verbose=TRUE)
TMatrixSPACEestEnsembl <- getTranscriptConcentrationMatrix(isoPlmSPACEestEnsembl)


# Analysis with SPACE method
# Number of transcripts for each gene equal to number of transcripts in Ensembl

isoPlmSPACEcdf <- IsoformPlm(csN,
                             method="SPACE",
                             nbrOfTranscripts="cdf",
                             cdfT=cdfTranscriptFile,
                             cdfE=cdfExonFile,
                             fileName="SPACETEST")
outSPACEcdf <- fit(isoPlmSPACEcdf,verbose=TRUE)

TListSPACEcdf <- getTranscriptConcentrationList(isoPlmSPACEcdf)
AListSPACEcdf <- getProbeAffinityList(isoPlmSPACEcdf)
GListSPACEcdf <- getTranscriptStructureList(isoPlmSPACEcdf, Gmatrix="G")
GcListSPACEcdf <- getTranscriptStructureList(isoPlmSPACEcdf, Gmatrix="Gc")
TMatrixSPACEcdf <- getTranscriptConcentrationMatrix(isoPlmSPACEcdf)


# Analysis with SPACE method
# Number of transcripts for each gene equal to 2

isoPlmSPACE2 <- IsoformPlm(csN,
                           method="SPACE",
                           nbrOfTranscripts=2,
                           cdfT=cdfTranscriptFile,
                           cdfE=cdfExonFile,
                           fileName="SPACETEST")
outSPACE2 <- fit(isoPlmSPACE2,verbose=TRUE)

TListSPACE2 <- getTranscriptConcentrationList(isoPlmSPACE2)
AListSPACE2 <- getProbeAffinityList(isoPlmSPACE2)
GListSPACE2 <- getTranscriptStructureList(isoPlmSPACE2, Gmatrix="G")
GcListSPACE2 <- getTranscriptStructureList(isoPlmSPACE2, Gmatrix="Gc")
TMatrixSPACE2 <- getTranscriptConcentrationMatrix(isoPlmSPACE2)