# load SPACE package # aroma.affymetrix package will also be loaded library(SPACE) # Set a proper directory to work with aroma.affymetrix # From this directory there should be several directories such as # annotationData, rawData, etc. setwd("E:/workingDir") # Download custom cdfs for Ensembl from # http://brainarray.mbni.med.umich.edu/Brainarray/Database/CustomCDF/CDF_download.asp # Go to last version (version 13) of the cdf files # Click on ENSG, ENST and ENSE versions # Go to Homo_sapiens and HuEx10stv2 (Affymetrix exon array) row # Click on the last option CDF/Seq/Map/Desc to download each file # Extract the cdf files from zip compresed files # Brainarray cdf files are in ascii format # They should be converted into binary format to be used with aroma.affymetrix # using the convertCdf command (It will take time) # This conversion should be done only once #convertCdf("HuEx10stv2_Hs_ENSG.cdf", "HuEx-1_0-st-v2,ENSG,brainarray,v13.cdf") #convertCdf("HuEx10stv2_Hs_ENST.cdf", "HuEx-1_0-st-v2,ENST,brainarray,v13.cdf") #convertCdf("HuEx10stv2_Hs_ENST.cdf", "HuEx-1_0-st-v2,ENST,brainarray,v13.cdf") #Download Affymetrix exon array sample dataset for human tissues from Affymetrix #Only 6 samples will be used corresponding to kidney and liver tissues #Once all the files have been downloaded, the must be arrange in the #following directory structure #D:/workingDir # /annotationData # /chipTypes # /HuEx-1_0-st-v2 # HuEx-1_0-st-v2,ENSG,brainarray,v13.cdf # HuEx-1_0-st-v2,ENST,brainarray,v13.cdf # HuEx-1_0-st-v2,ENSE,brainarray,v13.cdf # /rawData # /humanLiverKidney # /HuEx-1_0-st-v2 # huex_wta_kidney_A.CEL # huex_wta_kidney_B.CEL # huex_wta_kidney_C.CEL # huex_wta_liver_A.CEL # huex_wta_liver_B.CEL # huex_wta_liver_C.CEL # Background correction and normalization using aroma.affymetrix verbose <- Arguments$getVerbose(-8) timestampOn(verbose) projectName <- "humanLiverKidney" chipType <- "HuEx-1_0-st-v2" cdfGeneFile <- "HuEx-1_0-st-v2,ENSG,brainarray,v13" cdfTranscriptFile <- "HuEx-1_0-st-v2,ENST,brainarray,v13" cdfExonFile <- "HuEx-1_0-st-v2,ENSE,brainarray,v13" cdfG <- AffymetrixCdfFile$byChipType(cdfGeneFile) cdfT <- AffymetrixCdfFile$byChipType(cdfTranscriptFile) cdfE <- AffymetrixCdfFile$byChipType(cdfExonFile) cs <- AffymetrixCelSet$byName(projectName, cdf=cdfG) bc <- NormExpBackgroundCorrection(cs, method="mle", tag=c("*","ensembl")); csBC <- process(bc,verbose=verbose); qn <- QuantileNormalization(csBC, typesToUpdate="pm") csN <- process(qn,verbose=verbose) # Analysis with LiWangWong method isoPlmLWW <- IsoformPlm(csN, method="LiWangWong", cdfT=cdfTranscriptFile, cdfE=cdfExonFile, maxIter=2000, fileName="LWWTEST") print("isoPlm object constructed") outLWW <- fit(isoPlmLWW,verbose=TRUE) # Last function actually performs the summarization. TListLWW <- getTranscriptConcentrationList(isoPlmLWW) AListLWW <- getProbeAffinityList(isoPlmLWW) GListLWW <- getTranscriptStructureList(isoPlmLWW, Gmatrix="G") GcListLWW <- getTranscriptStructureList(isoPlmLWW, Gmatrix="Gc") TMatrixLWW <- getTranscriptConcentrationMatrix(isoPlmLWW) # Analysis with SPACE method # Estimating the number of transcripts isoPlmSPACEest <- IsoformPlm(csN, method="SPACE", nbrOfTranscripts="estimate", cdfT=cdfTranscriptFile, cdfE=cdfExonFile, fileName="SPACETEST") print("isoPlm object constructed") outSPACEest <- fit(isoPlmSPACEest,verbose=TRUE) # Last function actually performs the summarization. TListSPACEest <- getTranscriptConcentrationList(isoPlmSPACEest) AListSPACEest <- getProbeAffinityList(isoPlmSPACEest) GListSPACEest <- getTranscriptStructureList(isoPlmSPACEest, Gmatrix="G") GcListSPACEest <- getTranscriptStructureList(isoPlmSPACEest, Gmatrix="Gc") TMatrixSPACEest <- getTranscriptConcentrationMatrix(isoPlmSPACEest) # If it is only needed for some genes isoPlmSPACEestGenes <- IsoformPlm(csN,method="SPACE",nbrOfTranscripts="estimate", cdfT=cdfTranscriptFile,cdfE=cdfExonFile, fileName="SPACETEST_Genes") outSPACEestGenes <- fit(isoPlmSPACEestGenes,verbose=TRUE,units=c(186,196,241)) TMatrixSPACEestGenes <- getTranscriptConcentrationMatrix(isoPlmSPACEestGenes) # Get the unit that corresponds to an Ensembl identifier isoPlmSPACEestEnsembl <- IsoformPlm(csN,method="SPACE",nbrOfTranscripts="estimate", cdfT=cdfTranscriptFile,cdfE=cdfExonFile, fileName="SPACETEST_Ensembl") unit <- match("ENSG00000005302_at",getUnitNames(cdfG)) outSPACEestEnsembl <- fit(isoPlmSPACEestEnsembl,units=unit,verbose=TRUE) TMatrixSPACEestEnsembl <- getTranscriptConcentrationMatrix(isoPlmSPACEestEnsembl) # Analysis with SPACE method # Number of transcripts for each gene equal to number of transcripts in Ensembl isoPlmSPACEcdf <- IsoformPlm(csN, method="SPACE", nbrOfTranscripts="cdf", cdfT=cdfTranscriptFile, cdfE=cdfExonFile, fileName="SPACETEST") outSPACEcdf <- fit(isoPlmSPACEcdf,verbose=TRUE) TListSPACEcdf <- getTranscriptConcentrationList(isoPlmSPACEcdf) AListSPACEcdf <- getProbeAffinityList(isoPlmSPACEcdf) GListSPACEcdf <- getTranscriptStructureList(isoPlmSPACEcdf, Gmatrix="G") GcListSPACEcdf <- getTranscriptStructureList(isoPlmSPACEcdf, Gmatrix="Gc") TMatrixSPACEcdf <- getTranscriptConcentrationMatrix(isoPlmSPACEcdf) # Analysis with SPACE method # Number of transcripts for each gene equal to 2 isoPlmSPACE2 <- IsoformPlm(csN, method="SPACE", nbrOfTranscripts=2, cdfT=cdfTranscriptFile, cdfE=cdfExonFile, fileName="SPACETEST") outSPACE2 <- fit(isoPlmSPACE2,verbose=TRUE) TListSPACE2 <- getTranscriptConcentrationList(isoPlmSPACE2) AListSPACE2 <- getProbeAffinityList(isoPlmSPACE2) GListSPACE2 <- getTranscriptStructureList(isoPlmSPACE2, Gmatrix="G") GcListSPACE2 <- getTranscriptStructureList(isoPlmSPACE2, Gmatrix="Gc") TMatrixSPACE2 <- getTranscriptConcentrationMatrix(isoPlmSPACE2)