tdata.FPKM.sample.info <- readRDS(here("Data","20190406_RNAseq_B6_4wk_2DG_counts_phenotypes.RData"))

tdata.FPKM <- readRDS(here("Data","20190406_RNAseq_B6_4wk_2DG_counts_numeric.RData"))

log.tdata.FPKM <- log(tdata.FPKM + 1)
log.tdata.FPKM <- as.data.frame(log.tdata.FPKM)

log.tdata.FPKM.sample.info <- cbind(log.tdata.FPKM, tdata.FPKM.sample.info[,27238:27240])

log.tdata.FPKM.sample.info <- log.tdata.FPKM.sample.info %>% rownames_to_column() %>% filter(rowname != "A113") %>% column_to_rownames()

log.tdata.FPKM.subset <- log.tdata.FPKM[,colMeans(log.tdata.FPKM != 0) > 0.5] 

log.tdata.FPKM.sample.info.subset <- cbind(log.tdata.FPKM.subset,tdata.FPKM.sample.info[,27238:27240])
log.tdata.FPKM.sample.info.subset <- log.tdata.FPKM.sample.info.subset %>% rownames_to_column() %>% filter(rowname != "A113") %>% column_to_rownames()

log.tdata.FPKM.sample.info.subset.SM.SI.spleen <- log.tdata.FPKM.sample.info.subset %>% rownames_to_column() %>% filter(Tissue %in% c("Skeletal Muscle", "Spleen", "Small Intestine")) %>% column_to_rownames()

Wrangle Data

I will use Mouse IDs, Tissue, Treatment, and Time to keep track of the values in the matrices. All other covariates will be discarded.

# Set rownames by mouse ID and tissue
rownames(log.tdata.FPKM.sample.info.subset.SM.SI.spleen) <- paste0(rownames(log.tdata.FPKM.sample.info.subset.SM.SI.spleen),":", log.tdata.FPKM.sample.info.subset.SM.SI.spleen$Time, ":", log.tdata.FPKM.sample.info.subset.SM.SI.spleen$Treatment, ":", log.tdata.FPKM.sample.info.subset.SM.SI.spleen$Tissue)

# Discard covariates from columns 17333-17336
log.tdata.FPKM.sample.info.subset.SM.SI.spleen <- log.tdata.FPKM.sample.info.subset.SM.SI.spleen[,-(17333:17335)]

head(log.tdata.FPKM.sample.info.subset.SM.SI.spleen[,1:5])
##                                 ENSMUSG00000000001 ENSMUSG00000000028
## A001:4 wks:None:Spleen                    3.819688          2.1770219
## A005:4 wks:None:Skeletal Muscle           1.921325          0.7371641
## A006:4 wks:None:Small Intestine           3.984158          1.7544037
## A010:96 hrs:2DG:Spleen                    3.756071          1.5602477
## A014:96 hrs:2DG:Skeletal Muscle           1.993339          0.5306283
## A015:96 hrs:2DG:Small Intestine           4.654532          1.6808279
##                                 ENSMUSG00000000031 ENSMUSG00000000037
## A001:4 wks:None:Spleen                  0.34358970        0.076961041
## A005:4 wks:None:Skeletal Muscle         4.21019981        0.009950331
## A006:4 wks:None:Small Intestine         0.16551444        0.148420005
## A010:96 hrs:2DG:Spleen                  0.41871033        0.086177696
## A014:96 hrs:2DG:Skeletal Muscle         4.19026062        0.048790164
## A015:96 hrs:2DG:Small Intestine         0.06765865        0.364643114
##                                 ENSMUSG00000000049
## A001:4 wks:None:Spleen                   0.7701082
## A005:4 wks:None:Skeletal Muscle          0.3074847
## A006:4 wks:None:Small Intestine          0.7839015
## A010:96 hrs:2DG:Spleen                   0.6365768
## A014:96 hrs:2DG:Skeletal Muscle          0.7080358
## A015:96 hrs:2DG:Small Intestine          0.5306283

Check Data for Missing Values

WGCNA will have poor results if the data have too many missing values. I checked if any metabolites fall into this category.

log.tdata.FPKM.sample.info.subset.SM.SI.spleen.missing <- missing(log.tdata.FPKM.sample.info.subset.SM.SI.spleen)
cat("logFPKM: ", goodSamplesGenes(log.tdata.FPKM.sample.info.subset.SM.SI.spleen.missing, verbose=0)$allOK, "\n")
## logFPKM:  TRUE

WGCNA reports that all data are good! I now use hierarchical clustering to detect any obvious outliers. I did not see any particularly egregious outliers.

sampleclustering(log.tdata.FPKM.sample.info.subset.SM.SI.spleen.missing)

saveRDS(log.tdata.FPKM.sample.info.subset.SM.SI.spleen.missing, here("Data","SM.SI.spleen","log.tdata.FPKM.sample.info.subset.SM.SI.spleen.missing.WGCNA.RData"))

Analysis performed by Ann Wells

The Carter Lab The Jackson Laboratory 2023

ann.wells@jax.org