log.tdata.FPKM.sample.info.subset.liver.WGCNA <- readRDS(here("Data","Liver","log.tdata.FPKM.sample.info.subset.liver.missing.WGCNA.RData"))
The WGCNA package requires some strict R session environment set up.
options(stringsAsFactors=FALSE)
# Run the following only if running from terminal
# Does NOT work in RStudio
#enableWGCNAThreads()
WGCNA uses a smooth softmax function to force a scale-free topology on the gene expression network. I cycled through a set of threshold power values to see which one is the best for the specific topology. As recommended by the authors, I choose the smallest power such that the scale-free topology correlation attempts to hit 0.9.
threshold <- softthreshold(soft)
knitr::kable(soft$fitIndices)
Power | SFT.R.sq | slope | truncated.R.sq | mean.k. | median.k. | max.k. |
---|---|---|---|---|---|---|
1 | 0.1676528 | -0.7406579 | 0.4121981 | 4499.49707 | 4241.055886 | 6940.8574 |
2 | 0.8233717 | -1.2662434 | 0.8360069 | 1837.53642 | 1546.551878 | 4095.0359 |
3 | 0.9331752 | -1.3770692 | 0.9232597 | 934.21706 | 679.017891 | 2824.6421 |
4 | 0.9467113 | -1.4014966 | 0.9357120 | 544.14677 | 336.917903 | 2135.8386 |
5 | 0.9309819 | -1.4086437 | 0.9193551 | 347.60495 | 186.808962 | 1697.4075 |
6 | 0.9206839 | -1.4071824 | 0.9114456 | 237.24870 | 108.794320 | 1393.6070 |
7 | 0.9242562 | -1.3970730 | 0.9191278 | 170.12157 | 66.829971 | 1170.6657 |
8 | 0.9233451 | -1.3917799 | 0.9232094 | 126.71113 | 43.437328 | 1000.2308 |
9 | 0.9226327 | -1.3901466 | 0.9287457 | 97.25505 | 29.378261 | 865.9058 |
10 | 0.9192027 | -1.3899702 | 0.9304838 | 76.48167 | 20.087376 | 758.5059 |
11 | 0.9125883 | -1.3908586 | 0.9318177 | 61.36282 | 13.995328 | 670.4846 |
12 | 0.9046831 | -1.3934242 | 0.9313136 | 50.06803 | 10.000650 | 596.9870 |
13 | 0.9044865 | -1.3903093 | 0.9370619 | 41.44257 | 7.318231 | 534.8352 |
14 | 0.9047933 | -1.3955213 | 0.9434850 | 34.73115 | 5.442770 | 481.7141 |
15 | 0.9065988 | -1.3939986 | 0.9491630 | 29.42419 | 4.070167 | 435.8951 |
16 | 0.9021656 | -1.4000051 | 0.9494750 | 25.16853 | 3.091694 | 396.0608 |
17 | 0.8804805 | -1.4193153 | 0.9365922 | 21.71368 | 2.391115 | 361.1888 |
18 | 0.8833739 | -1.4207172 | 0.9413172 | 18.87824 | 1.855614 | 330.4737 |
19 | 0.8867793 | -1.4120067 | 0.9503302 | 16.52846 | 1.460437 | 303.2721 |
20 | 0.8825087 | -1.4071567 | 0.9529080 | 14.56413 | 1.151573 | 279.0638 |
soft$fitIndices %>%
download_this(
output_name = "Soft Threshold Liver",
output_extension = ".csv",
button_label = "Download data as csv",
button_type = "info",
has_icon = TRUE,
icon = "fa fa-save"
)
I generated a weighted adjacency matrix for the gene expression profile network using the softmax threshold I determined in the previous step. Then I generate the Topological Overlap Matrix (TOM) for the adjacency matrix. This reduces noise and spurious associations according to the authors.I use the TOM matrix to cluster the profiles of each metabolite based on their similarity. I use dynamic tree cutting to generate modules.
soft.power <- which(soft$fitIndices$SFT.R.sq >= 0.88)[1]
cat("\n", "Power chosen")
##
## Power chosen
soft.power
## [1] 3
cat("\n \n")
adj.mtx <- adjacency(log.tdata.FPKM.sample.info.subset.liver.WGCNA, power=soft.power)
TOM = TOMsimilarity(adj.mtx)
## ..connectivity..
## ..matrix multiplication (system BLAS)..
## ..normalization..
## ..done.
diss.TOM = 1 - TOM
metabolite.clust <- hclust(as.dist(diss.TOM), method="average")
min.module.size <- 15
modules <- cutreeDynamic(dendro=metabolite.clust, distM=diss.TOM, deepSplit=2, pamRespectsDendro=FALSE, minClusterSize=min.module.size)
## ..cutHeight not given, setting it to 0.959 ===> 99% of the (truncated) height range in dendro.
## ..done.
# Plot modules
cols <- labels2colors(modules)
table(cols)
## cols
## bisque4 black blue brown brown4
## 41 375 1289 824 46
## cyan darkgreen darkgrey darkmagenta darkolivegreen
## 189 125 117 87 89
## darkorange darkorange2 darkred darkslateblue darkturquoise
## 104 52 135 41 121
## floralwhite green greenyellow grey60 ivory
## 52 480 255 162 52
## lightcyan lightcyan1 lightgreen lightsteelblue1 lightyellow
## 187 53 159 54 157
## magenta mediumpurple3 midnightblue orange orangered4
## 310 57 187 111 59
## paleturquoise pink plum1 plum2 purple
## 91 338 63 40 287
## red royalblue saddlebrown salmon sienna3
## 465 136 96 194 76
## skyblue skyblue3 steelblue tan thistle2
## 98 67 92 255 35
## turquoise violet white yellow yellowgreen
## 7909 90 100 539 69
plotDendroAndColors(
metabolite.clust, cols, xlab="", sub="", main="Gene Clustering on TOM-based Similarity",
dendroLabels=FALSE, hang=0.03, addGuide=TRUE, guideHang=0.05
)
This function in WGCNA calculates the total connectivity of a node (metabolite) in the network. The kTotal column specifies this connectivity. The kWithin column specifies the node connectivity within its module. The kOut column specifies the difference between kTotal and kWithin. Generally, the out degree should be lower than the within degree, since the WGCNA procedure groups similar metabolite profiles together.
net.deg <- intramodularConnectivity(adj.mtx, merged.cols)
saveRDS(net.deg,here("Data","Liver","Chang_2DG_BL6_connectivity_liver.RData"))
head(net.deg)
## kTotal kWithin kOut kDiff
## ENSMUSG00000000001 1438.8692 1123.35965 315.5095 807.850150
## ENSMUSG00000000028 435.1013 221.51747 213.5838 7.933677
## ENSMUSG00000000031 883.0249 165.19496 717.8299 -552.634964
## ENSMUSG00000000037 506.8853 23.07137 483.8140 -460.742602
## ENSMUSG00000000049 604.7389 24.80179 579.9371 -555.135345
## ENSMUSG00000000056 579.9107 391.64435 188.2663 203.378039
The dataset generated coexpression modules of the following sizes. The grey submodule represents metabolites that did not fit well into any of the modules.
table(merged.cols)
## merged.cols
## bisque4 black brown brown4 cyan
## 41 375 1011 46 615
## darkgreen darkgrey darkolivegreen darkorange darkorange2
## 125 1661 280 156 52
## darkred darkslateblue darkturquoise green grey60
## 135 41 121 480 1463
## ivory lightcyan lightcyan1 lightgreen lightsteelblue1
## 52 187 53 159 54
## mediumpurple3 orange pink plum2 salmon
## 57 111 338 176 733
## skyblue skyblue3 tan turquoise violet
## 98 67 255 7909 90
## yellowgreen
## 69
# Module names in descending order of size
module.labels <- names(table(merged.cols))[order(table(merged.cols), decreasing=T)]
# Order eigenmetabolites by module labels
merged.mes <- merged.mes[,paste0("ME", module.labels)]
colnames(merged.mes) <- paste0("log.tdata.FPKM.sample.info.subset.liver.WGCNA_", module.labels)
module.eigens <- merged.mes
# Add log.tdata.FPKM.sample.info.subset.WGCNA tag to labels
module.labels <- paste0("log.tdata.FPKM.sample.info.subset.liver.WGCNA_", module.labels)
# Generate and store metabolite membership
module.membership <- data.frame(
Gene=colnames(log.tdata.FPKM.sample.info.subset.liver.WGCNA),
Module=paste0("log.tdata.FPKM.sample.info.subset.liver.WGCNA_", merged.cols)
)
saveRDS(module.labels, here("Data","Liver","log.tdata.FPKM.sample.info.subset.liver.WGCNA.module.labels.RData"))
saveRDS(module.eigens, here("Data","Liver","log.tdata.FPKM.sample.info.subset.liver.WGCNA.module.eigens.RData"))
saveRDS(module.membership, here("Data","Liver", "log.tdata.FPKM.sample.info.subset.liver.WGCNA.module.membership.RData"))
write.table(module.labels, here("Data","Liver","log.tdata.FPKM.sample.info.subset.liver.WGCNA.module.labels.csv"), row.names=F, col.names=F, sep=",")
write.csv(module.eigens, here("Data","Liver","log.tdata.FPKM.sample.info.subset.liver.WGCNA.module.eigens.csv"))
write.csv(module.membership, here("Data","Liver","log.tdata.FPKM.sample.info.subset.liver.WGCNA.module.membership.csv"), row.names=F)
Analysis performed by Ann Wells
The Carter Lab The Jackson Laboratory 2023
ann.wells@jax.org