######################################################## ### code chunk number 0: Installing ######################################################## #Installing with devtools library(devtools) install_github("mksamur/RTCGAToolbox") ######################################################## ### code chunk number 1: Prep Stage ### Loading library and checking Firehose parameters ######################################################## #Call package library("RTCGAToolbox") #Check dates and datasets getFirehoseDatasets() #Current datasets getFirehoseRunningDates() #stddata run dates getFirehoseAnalyzeDates(last=3) # last 3 analysis date ######################################################## ### code chunk number 2: Data Client ### Downloading selected dataset and data types ######################################################## #Call data client to get data from Firehose #Following funtion will download RNASeq and microarray processed #gene expression, mutation and GISTIC2 processed copy number #data for BRCA dataset from Firehose myData = getFirehoseData(dataset="BRCA", runDate="20140416",gistic2_Date="20140115", RNAseq_Gene=TRUE,mRNA_Array=TRUE,Mutation=TRUE) #Data download and preparation may take longer time depends on #connection bandwidth and computer ######################################################## ### code chunk number 3: Analysis Functions ######################################################## ######################################################## ### code chunk number 3A: Differential Gene Expression ######################################################## #Differential gene expression #Following function will do DGE analysis for gene level expression data #If there are data from multiple platform with tumor and normal samples #Each of those datasets will be used dgeResults = getDiffExpressedGenes(dataObject=myData) #Check results length(dgeResults) dgeResults[[1]]@Dataset dgeResults[[2]]@Dataset head(dgeResults[[1]]@Toptable) ######################################################## ### code chunk number 3B: CN and GE correlation ######################################################## #Correlation analysis #Will do correlation test between copy number data and gene expression data #If there are GE data from multiple platfrom, will process all #For RNASeq data, this function currently can not use raw count data, data should be normalized corResults = getCNGECorrelation(dataObject=myData) corResults[[1]]@Dataset head(corResults[[1]]@Correlations) ######################################################## ### code chunk number 3C: Mutation Frequencies ######################################################## #Mutation frequencies mutFrq = getMutationRate(dataObject=myData) head(mutFrq[order(mutFrq[,2],decreasing=TRUE),]) # Creating survival data frame and running analysis for # PIK3CA which is one of the most frequently mutated gene ######################################################## ### code chunk number 3D: Survival Analysis ######################################################## #Prepare survival data frame from clinical data #1st sample id, 2nd column Time and last column Censor clinicData = myData@Clinical clinicData = clinicData[3:5,2:ncol(clinicData)] clinicData[3,is.na(clinicData[3,])] = clinicData[2,is.na(clinicData[3,])] survData <- data.frame(Samples=colnames(clinicData), Time=as.numeric(clinicData[3,]), Censor=as.numeric(clinicData[1,])) #Do survival test and draw KM plot #This function can only use normalized expression data getSurvival(dataObject=myData,geneSymbols=c("PIK3CA"), sampleTimeCensor=survData) #You will get Current version of survival tool only works with normalized RNASeq data! #Warning and function will only do analysis with microarray data because #we did not wonload the normalized RNA seq data! ######################################################## ### code chunk number 4: Report Figure ######################################################## # Creating dataset analysis summary figure with getReport. # Figure will be saved as PDF file. Plese check your working dir. #Function requires gene coordinates, you may use data call to get hg19 #gene corrdinate table or you may use one from your favorite data(hg19.ucsc.gene.locations) head(hg19.ucsc.gene.locations) getReport(dataObject=myData,DGEResult1=dgeResults[[1]], DGEResult2=dgeResults[[2]], geneLocations=hg19.ucsc.gene.locations)