<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosbiol</journal-id>
<journal-title-group>
<journal-title>PLOS Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1544-9173</issn>
<issn pub-type="epub">1545-7885</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583</article-id>
<article-id pub-id-type="publisher-id">PBIOLOGY-D-19-01524</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Methods and Resources</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject><subj-group><subject>Carcinomas</subject><subj-group><subject>Squamous cell carcinomas</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Gene identification and analysis</subject><subj-group><subject>Genetic networks</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Network analysis</subject><subj-group><subject>Genetic networks</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Gene expression</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Mutation</subject><subj-group><subject>Somatic mutation</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Computational techniques</subject><subj-group><subject>Computational pipelines</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Basic cancer research</subject><subj-group><subject>Cancer genomics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genomic medicine</subject><subj-group><subject>Cancer genomics</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Transcriptome analysis</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Transcriptome analysis</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Knowledge-guided analysis of "omics" data using the KnowEnG cloud platform</article-title>
<alt-title alt-title-type="running-head">Knowledge-guided analysis of ‘omics’ data using the KnowEnG cloud platform</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-4683-6271</contrib-id>
<name name-style="western">
<surname>Blatti</surname>
<given-names>Charles</given-names>
<suffix>III</suffix>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<name name-style="western">
<surname>Emad</surname>
<given-names>Amin</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-8197-9577</contrib-id>
<name name-style="western">
<surname>Berry</surname>
<given-names>Matthew J.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-3533-0010</contrib-id>
<name name-style="western">
<surname>Gatzke</surname>
<given-names>Lisa</given-names>
</name>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Epstein</surname>
<given-names>Milt</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-5859-6896</contrib-id>
<name name-style="western">
<surname>Lanier</surname>
<given-names>Daniel</given-names>
</name>
<role content-type="http://credit.casrai.org/">Methodology</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-5990-7852</contrib-id>
<name name-style="western">
<surname>Rizal</surname>
<given-names>Pramod</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Ge</surname>
<given-names>Jing</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-2794-3044</contrib-id>
<name name-style="western">
<surname>Liao</surname>
<given-names>Xiaoxia</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Sobh</surname>
<given-names>Omar</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-4889-5010</contrib-id>
<name name-style="western">
<surname>Lambert</surname>
<given-names>Mike</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-7240-3402</contrib-id>
<name name-style="western">
<surname>Post</surname>
<given-names>Corey S.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-1215-8038</contrib-id>
<name name-style="western">
<surname>Xiao</surname>
<given-names>Jinfeng</given-names>
</name>
<role content-type="http://credit.casrai.org/">Methodology</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Groves</surname>
<given-names>Peter</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-3665-6012</contrib-id>
<name name-style="western">
<surname>Epstein</surname>
<given-names>Aidan T.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-1342-9709</contrib-id>
<name name-style="western">
<surname>Chen</surname>
<given-names>Xi</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-0117-7839</contrib-id>
<name name-style="western">
<surname>Srinivasan</surname>
<given-names>Subhashini</given-names>
</name>
<role content-type="http://credit.casrai.org/">Project administration</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-0855-6436</contrib-id>
<name name-style="western">
<surname>Lehnert</surname>
<given-names>Erik</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Kalari</surname>
<given-names>Krishna R.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<xref ref-type="aff" rid="aff006"><sup>6</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Wang</surname>
<given-names>Liewei</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<xref ref-type="aff" rid="aff007"><sup>7</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Weinshilboum</surname>
<given-names>Richard M.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<xref ref-type="aff" rid="aff007"><sup>7</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Song</surname>
<given-names>Jun S.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff008"><sup>8</sup></xref>
<xref ref-type="aff" rid="aff009"><sup>9</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Jongeneel</surname>
<given-names>C. Victor</given-names>
</name>
<role content-type="http://credit.casrai.org/">Supervision</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Han</surname>
<given-names>Jiawei</given-names>
</name>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff009"><sup>9</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Ravaioli</surname>
<given-names>Umberto</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<xref ref-type="aff" rid="aff010"><sup>10</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Sobh</surname>
<given-names>Nahil</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="fn" rid="econtrib001"><sup>‡</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-5268-1915</contrib-id>
<name name-style="western">
<surname>Bushell</surname>
<given-names>Colleen B.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
<xref ref-type="fn" rid="econtrib001"><sup>‡</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-6033-7746</contrib-id>
<name name-style="western">
<surname>Sinha</surname>
<given-names>Saurabh</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff009"><sup>9</sup></xref>
<xref ref-type="fn" rid="econtrib001"><sup>‡</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Carl R. Woese Institute for Genomic Biology, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Department of Electrical and Computer Engineering, McGill University, Montreal, Canada</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>National Center for Supercomputing Applications, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>Department of Computer Science, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>Seven Bridges Genomics, Charlestown, Massachusetts, United States of America</addr-line></aff>
<aff id="aff006"><label>6</label> <addr-line>Department of Health Sciences Research, Mayo Clinic, Rochester, Minnesota, United States of America</addr-line></aff>
<aff id="aff007"><label>7</label> <addr-line>Department of Molecular Pharmacology and Experimental Therapeutics, Mayo Clinic, Rochester, Minnesota, United States of America</addr-line></aff>
<aff id="aff008"><label>8</label> <addr-line>Department of Physics, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America</addr-line></aff>
<aff id="aff009"><label>9</label> <addr-line>Cancer Center at Illinois, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America</addr-line></aff>
<aff id="aff010"><label>10</label> <addr-line>Department of Electrical and Computer Engineering, University of Illinois at Urbana-Champaign, Urbana, Illinois, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Freeman</surname>
<given-names>Thomas C.</given-names>
</name>
<role>Academic Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>University of Edinburgh, UNITED KINGDOM</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<fn fn-type="other" id="econtrib001">
<p>‡These authors jointly supervised this work.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">sinhas@illinois.edu</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>23</day>
<month>1</month>
<year>2020</year>
</pub-date>
<pub-date pub-type="collection">
<month>1</month>
<year>2020</year>
</pub-date>
<volume>18</volume>
<issue>1</issue>
<elocation-id>e3000583</elocation-id>
<history>
<date date-type="received">
<day>29</day>
<month>5</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>19</day>
<month>12</month>
<year>2019</year>
</date>
</history>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Blatti et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pbio.3000583"/>
<abstract>
<p>We present Knowledge Engine for Genomics (KnowEnG), a free-to-use computational system for analysis of genomics data sets, designed to accelerate biomedical discovery. It includes tools for popular bioinformatics tasks such as gene prioritization, sample clustering, gene set analysis, and expression signature analysis. The system specializes in “knowledge-guided” data mining and machine learning algorithms, in which user-provided data are analyzed in light of prior information about genes, aggregated from numerous knowledge bases and encoded in a massive “Knowledge Network.” KnowEnG adheres to “FAIR” principles (findable, accessible, interoperable, and reuseable): its tools are easily portable to diverse computing environments, run on the cloud for scalable and cost-effective execution, and are interoperable with other computing platforms. The analysis tools are made available through multiple access modes, including a web portal with specialized visualization modules. We demonstrate the KnowEnG system’s potential value in democratization of advanced tools for the modern genomics era through several case studies that use its tools to recreate and expand upon the published analysis of cancer data sets.</p>
</abstract>
<abstract abstract-type="toc">
<p>Novel algorithms integrate prior knowledge about gene and protein interactions and annotations into fundamental analyses of genomic datasets such as gene prioritization, sample clustering, and gene set characterization. This study describes the development of a cloud-based platform, KnowEnG, which makes these "knowledge-guided" analyses readily accessible to researchers.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100000057</institution-id>
<institution>National Institute of General Medical Sciences</institution>
</institution-wrap>
</funding-source>
<award-id>U54GM114838</award-id>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-6033-7746</contrib-id>
<name name-style="western">
<surname>Sinha</surname>
<given-names>Saurabh</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>This effort was part of KnowEng BD2K Center supported by grant U54GM114838 awarded by National Institute of General Medical Sciences through funds provided by the trans-National Institutes of Health Big Data to Knowledge (BD2K) initiative (<ext-link ext-link-type="uri" xlink:href="https://commonfund.nih.gov/bd2k" xlink:type="simple">https://commonfund.nih.gov/bd2k</ext-link>). It was also funded in part by the Cancer Center at Illinois. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="0"/>
<page-count count="25"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>Data for the primary analyses are available in GitHub at <ext-link ext-link-type="uri" xlink:href="https://github.com/KnowEnG/quickstart-demos/tree/master/publication_data/blatti_et_al_2019" xlink:type="simple">https://github.com/KnowEnG/quickstart-demos/tree/master/publication_data/blatti_et_al_2019</ext-link> and in the Supporting Information files.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>The rapid growth of genomics data sets [<xref ref-type="bibr" rid="pbio.3000583.ref001">1</xref>] and efforts to consolidate diverse data sets into common portals [<xref ref-type="bibr" rid="pbio.3000583.ref002">2</xref>] have created an urgent need today for software frameworks that can be easily applied to these genomic “big data” to extract biological and medical insights from them [<xref ref-type="bibr" rid="pbio.3000583.ref003">3</xref>]. Here, we present “KnowEnG” (Knowledge Engine for Genomics, pronounced “knowing”), a cloud-based engine that provides a suite of powerful and easy-to-use machine learning tools for analysis of genomics data sets. These tools, also referred to as “pipelines,” perform common bioinformatics analyses such as clustering of samples, gene prioritization, gene set characterization, and signature analysis. The tools are geared toward diverse omics data sets that can be represented as spreadsheets or tables (genes x samples) that record typical genomic profiles, such as gene expression, mutation counts, etc., for a collection of samples, at the resolution of individual genes. The pipelines help identify biologically meaningful patterns in the provided spreadsheet data, through ab initio analysis as well as by contextualizing with prior knowledge. The utility of KnowEnG is increased by co-localization of its tools with prior knowledge data sets from a large variety of sources.</p>
<sec id="sec002">
<title>Diverse computing environments for KnowEnG</title>
<p>The genomics computing infrastructure of the future has to be adapted to the diverse ecosystem of data sets and tools that will continue to flourish in genomic research. In particular, tools must be “findable, accessible, interoperable, and reusable” [<xref ref-type="bibr" rid="pbio.3000583.ref004">4</xref>], i.e., comply with the “FAIR” principles that guide the modern vision of biological data science. In recognition of these principles, software components of the KnowEnG system are packaged using state-of-the-art technology [<xref ref-type="bibr" rid="pbio.3000583.ref005">5</xref>] that makes them highly portable and amenable to scalable execution in varying computing environments. A convenient way to access the system is through a web portal that links to a KnowEnG server (<ext-link ext-link-type="uri" xlink:href="https://knoweng.org/analyze/" xlink:type="simple">https://knoweng.org/analyze/</ext-link>; also see Appendix A in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>) running on Amazon Web Services (AWS). A user can upload their genomics data set as a spreadsheet and then execute available pipelines (<xref ref-type="fig" rid="pbio.3000583.g001">Fig 1A and 1B</xref> and Appendix B in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>). Often, the resulting outputs of one KnowEnG pipeline can be further analyzed using another pipeline, and the system facilitates such “handover” between pipelines. For added security and control, users may also create a personal instance of the KnowEnG server and web portal using their AWS accounts (Appendix C in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>). This design feature can help meet challenges of heavy computing loads faced by a public analytics server. Computationally savvy users may invoke the pipelines and avail of additional functionalities through Jupyter [<xref ref-type="bibr" rid="pbio.3000583.ref006">6</xref>] notebooks (<ext-link ext-link-type="uri" xlink:href="https://knowtebook.knoweng.org" xlink:type="simple">https://knowtebook.knoweng.org</ext-link>) from a dedicated KnowEnG server. A third mode of access, created for cancer researchers, is via the Cancer Genomics Cloud resource built by Seven Bridges (SB-CGC) [<xref ref-type="bibr" rid="pbio.3000583.ref007">7</xref>], in which users may directly access large cancer data sets, such as those generated by the Cancer Genome Atlas (TCGA) program [<xref ref-type="bibr" rid="pbio.3000583.ref008">8</xref>], and analyze them using KnowEnG pipelines (<ext-link ext-link-type="uri" xlink:href="https://cgc.sbgenomics.com/public/apps#q?search=knoweng" xlink:type="simple">https://cgc.sbgenomics.com/public/apps#q?search=knoweng</ext-link>) without transferring the data from AWS. Through these varied access modes, KnowEnG facilitates accessibility, interoperability, and reusability of its tools, marking a significant step towards realizing the “FAIR” vision.</p>
<fig id="pbio.3000583.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pbio.3000583.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Overview of KnowEnG platform.</title>
<p>(A) Typical workflow steps for KnowEnG: genomic analysis on the cloud. (B) Analytical functionalities are organized as “pipelines” for common tasks such as clustering, gene prioritization, gene set analysis, and signature analysis. Each pipeline offers various options to customize the analysis, including use of prior knowledge. (C) The KnowEnG Knowledge Network represents prior knowledge that may be used during analysis. Nodes in the network represent genes and biological properties, and edges represent either annotations of gene properties or gene-gene relationships. Network contents are summarized on the left with logos of data sources shown on the right. KnowEnG, Knowledge Engine for Genomics; TCGA, the Cancer Genome Atlas.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.g001" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec003">
<title>Knowledge Network–guided analysis</title>
<p>An important feature of KnowEnG pipelines is that they can incorporate large-scale prior knowledge about genes into analyses of the user’s data set. A basic form of such “knowledge-guided” analysis is already common, in which the researcher performs statistical analysis of an experimental data set and then interprets the results in the light of prior knowledge from publicly available gene annotation repositories such as Gene Ontology (GO) [<xref ref-type="bibr" rid="pbio.3000583.ref009">9</xref>], Reactome [<xref ref-type="bibr" rid="pbio.3000583.ref010">10</xref>], etc. KnowEnG makes this analytic process more rigorous by adapting its statistical tools to be directly guided by the vast data in such public repositories of gene annotations and interactions. In doing so, KnowEnG builds on a rich tradition of knowledge-guided analysis methods that have been previously reported for a variety of biological research tasks including (1) clustering of samples into cancer subtypes [<xref ref-type="bibr" rid="pbio.3000583.ref011">11</xref>–<xref ref-type="bibr" rid="pbio.3000583.ref014">14</xref>], (2) finding markers and drivers of disease [<xref ref-type="bibr" rid="pbio.3000583.ref015">15</xref>–<xref ref-type="bibr" rid="pbio.3000583.ref020">20</xref>], (3) prediction of patient survival [<xref ref-type="bibr" rid="pbio.3000583.ref021">21</xref>,<xref ref-type="bibr" rid="pbio.3000583.ref022">22</xref>] or cancer metastases [<xref ref-type="bibr" rid="pbio.3000583.ref023">23</xref>], (4) characterization of experimental gene sets [<xref ref-type="bibr" rid="pbio.3000583.ref024">24</xref>–<xref ref-type="bibr" rid="pbio.3000583.ref028">28</xref>], and (5) prediction of gene functions [<xref ref-type="bibr" rid="pbio.3000583.ref029">29</xref>–<xref ref-type="bibr" rid="pbio.3000583.ref031">31</xref>]. KnowEnG also breaks the logistical barriers associated with utilizing large databases of prior knowledge, by co-locating its “knowledge-guided analysis” tools with a diverse knowledgebase compiled from numerous popular repositories. The knowledgebase is encoded in a massive heterogeneous network called the “Knowledge Network,” whose nodes are genes/proteins and whose edges represent properties (e.g., pathway membership) and mutual relationships (e.g., protein-protein interaction) of the nodes (<xref ref-type="fig" rid="pbio.3000583.g001">Fig 1C</xref>). The network represents annotations of 41 different types from 20 species and 13 different data sources and includes 476 million edges, 405,000 gene nodes, and 178,000 property nodes; the network is regularly updated via a “one-click” internal system (Appendix A in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>). Users typically select the annotation type that is most relevant for guiding their analysis (Appendix D in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>) in the course of launching a pipeline. The Knowledge Network is also available as a stand-alone resource that allows subnetworks associated with a knowledge type to be retrieved (Appendix E in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>).</p>
<p>Here, we demonstrate the main functionalities, features, and interfaces of KnowEnG in the context of 2 influential data sets in cancer genomics [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>,<xref ref-type="bibr" rid="pbio.3000583.ref033">33</xref>]. We reproduce several key analyses of the original cancer studies in the KnowEnG system to highlight the ease-of-use with which multiple analysis pipelines can be invoked to generate publishable general insights and extract specific hypotheses from the data. We also present novel knowledge-guided analyses on these data sets that often result in more significant findings and provide a multifaceted narrative of the insights that the data have to offer. The scope of KnowEnG analytics goes far beyond cancer analysis, however, with the system currently supporting analysis of users’ genomics data from any of 20 model organisms. We explain later (see <xref ref-type="sec" rid="sec020">Discussion</xref>) how its tools are broadly applicable to genomics data sets from any biological domain in any of the supported species.</p>
</sec>
</sec>
<sec id="sec004" sec-type="results">
<title>Results</title>
<sec id="sec005">
<title>Overview of 3 “case studies”</title>
<p>We begin with an overview of the case studies used as demonstrations of KnowEnG pipelines. These pipelines analyze spreadsheets of genes (rows) by samples (columns), which tabulate numeric data on each gene in each sample condition. The data may come from a variety of sources, e.g., high-throughput transcriptomics assays using various technologies, mutation counts at the gene level, copy number variations, etc. These data are then analyzed through one of the KnowEnG pipelines, depending on the biomedical questions of interest, and the results are visualized in the platform and optionally passed on to additional pipelines for deeper investigation (<xref ref-type="fig" rid="pbio.3000583.g002">Fig 2A</xref>). Additional information and simple instructions for easily reproducing the main analyses presented in this paper on the KnowEnG web server are found in Appendix F in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>.</p>
<fig id="pbio.3000583.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pbio.3000583.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Case studies demonstrating knowledge-guided analysis.</title>
<p>(A) Each of the KnowEnG analysis pipelines is shown as a box with corresponding colored arrows connecting to their general inputs and outputs. Because the outputs of a pipeline are often valid inputs for another pipeline, a convenient “handover” mechanism in the KnowEnG web portal enables chaining together multiple pipelines, facilitating deeper and multifaceted analysis of the user’s data. (B) The analysis workflow for Case Study 1 with unused pipelines shaded gray. Simple descriptions of the case study–specific inputs and outputs are provided, along with notes on where to find the detailed methods or figures of the results. The pipeline step of the workflow that incorporates knowledge-guided analysis is indicated with the blue network icon. (C) Analysis workflow for Case Study 2. (D) Analysis workflow for Case Study 3. ESCC, esophageal squamous cell carcinoma; GO, Gene Ontology; KnowEnG, Knowledge Engine for Genomics; LUSC, lung squamous cell carcinoma; TCGA, the Cancer Genome Atlas.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.g002" xlink:type="simple"/>
</fig>
<p>In Case Study 1 (<xref ref-type="fig" rid="pbio.3000583.g002">Fig 2B</xref>), we analyzed multiomics data on patients from a pan-cancer study [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>]. We first used the “Sample Clustering” pipeline on somatic mutation data to identify clusters of patients with similar mutation profiles. We used knowledge-guided clustering in this step and found it to reveal an improved grouping of patients compared to traditional clustering, as judged by survival characteristics of the resulting clusters. We used the “Gene Prioritization” pipeline to identify genes that are most associated with each patient group and characterized the pathways related to those genes using the “Gene Set Characterization” pipeline. We also grouped the pan-cancer cohort of patients using other omics data and combined the results to obtain a single clustering based on all available data types, which we passed to the “Spreadsheet Visualizer” module to visually explore different aspects of the results including the improved significance of these clusters with overall patient survival.</p>
<p>In Case Study 2 (<xref ref-type="fig" rid="pbio.3000583.g002">Fig 2C</xref>), we used the knowledge-guided Gene Prioritization pipeline to discover genes whose expression (from RNA sequencing [RNA-seq]) associated with each tumor type (as defined by tissue of origin) in the pan-cancer data set and found the discovered genes to include known drivers of the respective cancer types. We used the Gene Set Characterization pipeline to perform enrichment analysis of these prioritized genes and identify pathways and biological processes associated with each cancer type. We also found the prioritized genes to form a strong signature for the cancer types, in the sense that patient groups identified by examining expression levels of those genes were as informative of survival as groups identified from transcriptome-wide profiles.</p>
<p>In Case Study 3 (<xref ref-type="fig" rid="pbio.3000583.g002">Fig 2D</xref>), we reproduced key findings of a transcriptomic study of esophageal squamous cell carcinoma (ESCC) [<xref ref-type="bibr" rid="pbio.3000583.ref033">33</xref>] using the “Signature Analysis” pipeline. We assigned subtypes to ESCC patient transcriptomic profiles by relying on previously known signatures of lung squamous cell carcinoma (LUSC) subtypes, which provide clues into prognosis and chemotherapeutic resistance. We then identified genes and pathways associated with each ESCC subtype. Our main goal in this case study was to demonstrate how KnowEnG functionalities can be easily accessed on a third-party platform (Seven Bridges Cancer Genomics Cloud) where the data set resides. This flexibility allows researchers to seamlessly combine KnowEnG tools with other specialized tools that are available on the Seven Bridges platform when undertaking more complex projects with many different analytical components.</p>
<p>In the following sections, we describe each of the above case studies in detail. Our goal is to present typical usage of KnowEnG pipelines through examples taken from impactful published studies, rather than as user manuals, which are also available through the KnowEnG website in help text and videos (Table A in <xref ref-type="supplementary-material" rid="pbio.3000583.s005">S1 Data</xref>). These case studies illustrate how the pipelines can lead to broad insights from omics data sets and also more specific hypotheses such as genes and pathways involved in a process can emerge from follow-up analyses, also within KnowEnG.</p>
</sec>
<sec id="sec006">
<title>Case Study 1: Clustering of pan-cancer data</title>
<p>As a first demonstration of the analytic capabilities of KnowEnG, we describe how the Sample Clustering pipeline can be used to group genomic profiles in a knowledge-guided manner. Clustering is one of the most widely used tools in bioinformatics [<xref ref-type="bibr" rid="pbio.3000583.ref034">34</xref>] and can help identify subgroups of samples that represent distinct biological or pathological states [<xref ref-type="bibr" rid="pbio.3000583.ref035">35</xref>]; patient stratification in cancer, where subtypes are defined based on molecular markers [<xref ref-type="bibr" rid="pbio.3000583.ref036">36</xref>], is a prime example. The same clustering tools are often applied to different types of genomic profiles, including gene expression, mutation counts, copy number mutations, etc. [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>]. However, clustering of somatic mutation profiles of cancer patients presents a significant obstacle, because each profile is sparse (a minuscule fraction of genomic loci are mutated) and has little direct similarity to other profiles. As an example of a data set that presents this challenge, we worked with somatic mutation profiles of 3,276 tumor samples spanning 12 cancer types (Appendix B in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>) from the “pancan12” data set generated by the TCGA consortium [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>]. (This large data set provides a natural “ground truth,” viz., tumor type, for assessing clustering methods). We first used the “standard” mode of KnowEnG’s Sample Clustering pipeline, viz., Hierarchical Clustering, in 6 different algorithmic configurations to identify 14 clusters (so as to match that in the original publication [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>]) of tumor samples based on their somatic mutations. (The standard mode of this pipeline also offers K-means clustering). This failed to produce meaningful clusters, and almost every clustering result exhibited strong “resolution bias” [<xref ref-type="bibr" rid="pbio.3000583.ref037">37</xref>], with one cluster comprising over 90% of the samples (Appendix C <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref> and Table E in <xref ref-type="supplementary-material" rid="pbio.3000583.s006">S2 Data</xref>). The sole exception was clustering with Jaccard similarity and complete linkage [<xref ref-type="bibr" rid="pbio.3000583.ref038">38</xref>], and even here the largest cluster had over 70% of the samples; we will refer to this below as the standard clustering. This initial analysis illustrates the challenge in clustering somatic mutation profiles: because of their high dimensionality and sparsity, biologically related profiles often do not harbor shared mutations and are not grouped together [<xref ref-type="bibr" rid="pbio.3000583.ref011">11</xref>], ultimately leading to many small and one or few large clusters.</p>
<sec id="sec007">
<title>Knowledge-guided clustering of mutation profiles</title>
<p>Knowledge-guided clustering powered by the Knowledge Network offers a possible solution to the problem just noted. Here, prior knowledge of gene-gene relationships encoded in the network is used to recognize when somatic mutations in different genes may be functionally related, thus allowing more subtle forms of similarity between mutation profiles to be exploited in grouping patients. The knowledge-guided option of the Sample Clustering pipeline (<xref ref-type="fig" rid="pbio.3000583.g003">Fig 3A</xref>) implements the “Network-based Stratification” (NBS) algorithm of Hofree and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref011">11</xref>], in which a random-walk method makes patient mutation profiles less sparse by borrowing information from the Knowledge Network before the actual clustering step. We used knowledge-guided clustering with the HumanNet Integrated network (hnInt) [<xref ref-type="bibr" rid="pbio.3000583.ref039">39</xref>] as prior knowledge to group patients into 14 clusters. This yielded more size-balanced clusters; the largest cluster included 30% of the 3,276 patients. To test whether patient groups identified from mutation profiles are tied to their phenotypic characteristics, we performed Kaplan-Meier survival analysis (<xref ref-type="fig" rid="pbio.3000583.g003">Fig 3B</xref>). A log-rank test revealed highly significant distinction across the clusters in terms of survival probabilities (<italic>p</italic> = 3.7 × 10<sup>−33</sup>), which was clearly better than that observed in the standard clustering (<italic>p</italic> = 7.4 × 10<sup>−10</sup>; Figure F in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>). Notably, the original clustering analysis of mutation profiles by Hoadley and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>] was also knowledge-guided, relying on mutations in similar pathways to group related samples, and survival analysis of their original sample clusters produced similarly significant survival distinction (<italic>p</italic> = 4.3 × 10<sup>−29</sup>; Figure H in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>). The KnowEnG Sample Clustering pipeline, although producing comparable results in terms of survival distinction among clusters, stands out for its ease-of-use compared with executing the multistep methods of the original analysis. For instance, the user avoids download and harmonization of prior knowledge, installation, and configuration of multiple software, data transformations between steps, and possibly arranging for computing resources capable of compute-intensive steps such as bootstrap sampling (explained below).</p>
<fig id="pbio.3000583.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pbio.3000583.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Knowledge-guided sample clustering.</title>
<p>(A) Knowledge-guided sample clustering, illustrated in the context of somatic mutation profiles of cancer patients. Because mutations are rare, 2 patients may not have mutations to the same gene(s), and their mutual similarity will be modest. In the knowledge-guided mode (bottom), similarities between patient profiles are detected if not only the same genes are mutated but also if genes located proximally on a network are mutated; this “relaxed” notion of mutation profile similarity leads to improved clustering. (B) Kaplan-Meier survival analysis of clusters from HumanNet-guided clustering of somatic mutation profiles. Each of 14 reported clusters is plotted as a separate survival curve, and the <italic>p</italic>-value of the multivariate log-rank test is displayed. (C) Concordance between different clustering approaches, using ARI. Three of these approaches use the Sample Clustering (sc) pipeline, with HumanNet (hnNet), STRING text mining (sText) or no network (noNet) for guidance. Two clustering approaches are reproductions from the Hoadley and colleagues (“tcga_mut” obtained from mutation data and “tcga_coca” obtained from multiomics data using COCA). The sixth clustering (disease) is simply a grouping of patients by tumor type. (D) Kaplan-Meier survival analysis of 13 COCA clusters in pan-cancer multiomics data. Users may click the clock icon next to cluster assignments in the Spreadsheet Visualizer to access this display, which uses the current grouping criterion (configurable) for survival analysis. (E) Sample Clustering of pan-cancer multiomics profiles, displayed by the Spreadsheet Visualizer module. Patient profiles are grouped by overall cluster assignment using COCA. The top heat map (blue) shows cluster assignments based on individual omics data types (expr, expression; RPPA, proteomic; CNV, copy number variation; methyl, methylation; miRNA, microRNA). The heat maps below show CNV data for select genes (middle) and mutation data for select genes (bottom) for the same patients. Users can configure the number of rows to display for each data source, the statistical criteria for selecting rows, and their sorting order. The grouping criteria for samples (COCA cluster assignments here) can also be configured. User-selected clinical annotations of patients (primary disease in this view; color bar second from top) may also be displayed. ARI, adjusted rand index; CNV, copy number variation; COCA,cluster of cluster assignment; NBS, network-based stratification; STRING, search tool for recurring instances of neighboring genes.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.g003" xlink:type="simple"/>
</fig>
<p>Delving deeper into the patient clusters obtained above, we asked whether the clusters recapitulate the tumor types of patients or whether they reveal new structures in the data. To this end, we calculated the adjusted rand index (ARI) [<xref ref-type="bibr" rid="pbio.3000583.ref040">40</xref>] between the clusters and tumor types and repeated the process for other approaches to sample clustering, including the multiomics Cluster-Of-Cluster-Assignment (COCA) clustering reported in the work by Hoadley and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>] (<xref ref-type="fig" rid="pbio.3000583.g003">Fig 3C</xref>). Interestingly, although there is a high concordance between tumor type and the COCA cluster labels of the work by Hoadley and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>] (ARI = 0.82), the same is not true for NBS-based clusters from the KnowEnG pipeline (ARI = 0.13) or for the pathway-based clustering of mutation profiles in the original study (ARI = 0.13). In other words, knowledge-guided clustering finds groups of patient mutation profiles that have strong correspondence with survival characteristics yet do not simply track tumor types, suggesting alternative levels of molecular similarity. We explored this possibility in detail (Appendix D in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>) and found the clusters to be characterized by mutations in genes from specific and distinct pathways, even when they are mixed in terms of tumor type representation.</p>
</sec>
<sec id="sec008">
<title>Clustering of multiomics data</title>
<p>The standard clustering pipeline in KnowEnG may be applied to any type of spreadsheet data to cluster a collection of samples, whereas the knowledge-guided clustering pipeline may be used on any gene-level spreadsheet, in which rows represent genes. We showcase this capability by performing “multiomics clustering” of the same cohort of patients as above. A major advantage of multiomics profiling of patients is that their mutual relationships and hidden group structures revealed by each data type can be consolidated into a more integrative, higher-level clustering that is more informative than any one type of profile alone. This was demonstrated by Hoadley and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>] through their COCA method. Mimicking their approach, we first clustered the above pan-cancer cohort of patients based on their gene expression, methylation, copy number variation, or protein abundance profiles (Appendix C in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref> and Table E in <xref ref-type="supplementary-material" rid="pbio.3000583.s006">S2 Data</xref>) separately, using standard clustering. (Knowledge-guided clustering may also be used for all of these profiles except methylation, which is not a gene-level data set). In addition, we considered our knowledge-guided clustering of mutation data reported above and the microRNA clustering from the original publication [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>], thus arriving at 6 different ways to partition the cohort into clusters. Each such clustering assigns a cluster identifier to a patient, and we can thus describe the multiomics profiles of the patient as a succinct “meta-profile” of 6 cluster identifiers. We then used the standard clustering pipeline on these meta-profiles, arriving at 13 clusters (again mimicking the original published analysis [<xref ref-type="bibr" rid="pbio.3000583.ref032">32</xref>]) that capture the 6 different omics data sets on the same patients. For this step, we employed the “bootstrap clustering” option of the sample clustering pipeline, which typically yields more robust clustering [<xref ref-type="bibr" rid="pbio.3000583.ref041">41</xref>]; the ease of employing this powerful feature is another example of value added by a cloud-based infrastructure. The steps in which different clustering results were combined into common profiles require manipulations with multiple spreadsheets, each being the result of a separate cluster. KnowEnG facilitates these steps, as well as several other common matrix operations, through its “mini pipelines” that are available as notebooks in a Jupyter environment (Appendix E in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>).</p>
</sec>
<sec id="sec009">
<title>Interactive visualization</title>
<p>Results of the above multiomics cluster analysis were visualized via the Spreadsheet Visualizer module of KnowEnG (<xref ref-type="fig" rid="pbio.3000583.g003">Fig 3E</xref>), which in addition to displaying multiple spreadsheets as a heat map, allows users to simultaneously visualize various other properties of samples (e.g., cluster assignments provided by COCA, selected clinical annotations such as age, survival months, and primary disease type), offers different ways of sorting, filtering, and grouping the data and provides useful descriptive statistics, such as histograms, in an interactive manner. The interactive visualization also allows us to easily perform survival analysis of the displayed clusters, and we used this feature to find that the new multiomics clusters are strongly concordant with tumor type (ARI = 0.72) and exhibit differences in survival probabilities (p = 1.0 × 10<sup>−150</sup>; <xref ref-type="fig" rid="pbio.3000583.g003">Fig 3D</xref>, Appendix F in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>) far more prominently than the mutation-only analyses had revealed. The Spreadsheet Visualizer is a powerful data exploration and preliminary analysis tool in its own right (see Appendix G in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref> for details) and can be utilized independently of the clustering pipeline.</p>
</sec>
<sec id="sec010">
<title>Clustering for patient stratification</title>
<p>As an illustration of how the Sample Clustering pipeline may be used for patient subtyping [<xref ref-type="bibr" rid="pbio.3000583.ref036">36</xref>], we next clustered breast cancer patients in the METABRIC data set [<xref ref-type="bibr" rid="pbio.3000583.ref042">42</xref>] based on genes related to the epithelial to mesenchymal transition (EMT), which is a process involved in metastasis. Following the approach in the work by Emad and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref043">43</xref>], we clustered patients into 2 groups based on the expression of their EMT-related genes (Appendix G in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>). Although standard mode of Sample Clustering did not result in clusters with distinct survival probabilities, the knowledge-guided mode achieved significant Kaplan-Meier log-rank <italic>p</italic>-values using either the STRING [<xref ref-type="bibr" rid="pbio.3000583.ref044">44</xref>] text-mining interaction network (“sText;” <italic>p</italic> = 3.1 × 10<sup>−4</sup>) or the HumanNet “hnInt” network (<italic>p</italic> = 7.6 × 10<sup>−4</sup>; Figures L and M in <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>).</p>
</sec>
</sec>
<sec id="sec011">
<title>Case Study 2: Gene prioritization for tumor types</title>
<p>A routinely conducted analysis of high-throughput omics profiles is in the determination of genes associated with particular phenotypic conditions or biological processes of interest. Discovery of differentially expressed genes [<xref ref-type="bibr" rid="pbio.3000583.ref045">45</xref>] by contrasting transcriptomic profiles before and after treatment or in case versus control experiments, or of genes whose expression correlates with a numeric phenotype such as drug response [<xref ref-type="bibr" rid="pbio.3000583.ref046">46</xref>] are prime examples. The Gene Prioritization pipeline in KnowEnG offers this functionality, given a spreadsheet of omics data (genes x samples) and a “phenotype spreadsheet” (phenotypes x samples) that represents one or more phenotypic labels for each sample in the omics spreadsheet. As a simple demonstration of this pipeline, we analyzed expression data from tumor samples in the pancan12 data set introduced above, comparing each tumor type with all others using a <italic>t</italic> test to identify significant differences in individual gene expression between the groups; this is the standard version of the pipeline (<xref ref-type="fig" rid="pbio.3000583.g004">Fig 4A</xref>, Appendix A in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>).</p>
<fig id="pbio.3000583.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pbio.3000583.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Knowledge-guided gene prioritization.</title>
<p>(A) In standard mode (top), each gene’s expression is tested for association with phenotypic labels, e.g., with a <italic>t</italic> test. In the (bottom) knowledge-guided mode (ProGENI algorithm), each gene’s expression is first transformed by taking into account expression levels of its network neighbors, and these “network-smoothed” expression values are tested for association with phenotype. The resulting ranking of genes is subjected to second phase of network-based smoothing to obtain the final ranking. (B) Visualization of results from the Gene Prioritization pipeline, used here to identify top genes associated with each tumor type (based on expression data). Users may choose to analyze and visualize results for multiple phenotypes together and configure how many top genes per phenotype the report should include. (C) Known driver genes for each tumor type that are highly prioritized by standard and/or knowledge-guided modes of Gene Prioritization. (D) Comparison between tumor type–related genes identified using the Gene Prioritization pipeline in standard mode (“GP_noNet”) or knowledge-guided mode using HumanNet (“GP_hnInt”), based on their enrichment for GO terms. The axes represent the negative logarithm (base 10) of <italic>p</italic>-value of enrichment between the set of highly prioritized genes (from either method) for a tumor type and the most enriched GO category for that set. GO, gene ontology.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.g004" xlink:type="simple"/>
</fig>
<sec id="sec012">
<title>Knowledge-guided gene prioritization</title>
<p>KnowEnG also offers a knowledge-guided mode of this pipeline, where the ProGENI algorithm of Emad and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref047">47</xref>] is used to incorporate a network encoding prior knowledge into the identification of phenotype-related genes (<xref ref-type="fig" rid="pbio.3000583.g004">Fig 4A</xref>), using random walk-based techniques similar to those used in the NBS clustering approach [<xref ref-type="bibr" rid="pbio.3000583.ref011">11</xref>]. We had previously tested ProGENI on the task of prioritizing drug response–related genes. Through systematic benchmarking, experimental validations and literature surveys we showed that it identifies phenotype-related genes more accurately compared with simple statistical methods as well as machine learning methods that do not utilize prior knowledge [<xref ref-type="bibr" rid="pbio.3000583.ref048">48</xref>]. We now applied this algorithm, via the knowledge-guided gene prioritization pipeline, to identify top genes associated with each tumor type, based on expression data (<xref ref-type="fig" rid="pbio.3000583.g004">Fig 4B</xref>, Appendix A in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>). (KnowEnG allows this analysis to be performed for all tumor types through one simple operation, rather than repeat it for each tumor type separately).</p>
</sec>
<sec id="sec013">
<title>Gene prioritization finds driver genes</title>
<p>For an independent assessment of the above results, we compared the top 100 genes for each tumor type with drivers of that cancer as cataloged in the IntOGen database [<xref ref-type="bibr" rid="pbio.3000583.ref049">49</xref>] based on mutation and gene fusion data (<xref ref-type="fig" rid="pbio.3000583.g004">Fig 4C</xref>). We observed overlaps between the 2 lists; for example, in head and neck squamous cell carcinoma (HNSCC), 6 of the highly prioritized genes are known drivers (Fisher’s exact test p = 8.2 × 10<sup>−4</sup>; Figure A in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>). A similar assessment of genes reported by the standard pipeline (without knowledge-guidance) revealed fewer overlaps with respective driver sets for all but 2 tumor types (<xref ref-type="fig" rid="pbio.3000583.g004">Fig 4C</xref>). Often, common driver genes were identified by both versions of the pipeline, e.g., GATA3 for breast cancer (BRCA), but in many cases the knowledge-guided version reported known drivers that were missed by the standard pipeline, e.g., FOXA1 for BRCA, NRAS and KRAS for acute myeloid leukemia (AML), and CDH1, CTNNB1, and EGFR for HNSCC. (ESR1, a well known marker of BRCA [<xref ref-type="bibr" rid="pbio.3000583.ref050">50</xref>], was ranked in the top 1.2% of all genes for BRCA, but ranked much worse for other tumor types). Similar conclusions were reached when we repeated the assessment using a larger external set of tumor type drivers, based on both IntOGen and COSMIC databases [<xref ref-type="bibr" rid="pbio.3000583.ref049">49</xref>, <xref ref-type="bibr" rid="pbio.3000583.ref051">51</xref>] (Appendix A in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>).</p>
</sec>
<sec id="sec014">
<title>Functional enrichment of prioritized genes</title>
<p>To gain further insights into the highly ranked genes reported for each tumor type in the above analysis, we subjected them to functional enrichment analysis through the Gene Set Characterization pipeline, whose standard version uses the Fisher’s exact test to assess the enrichment of a gene set for prespecified annotations. This revealed various interesting pathways and GO terms as being significantly associated with each tumor type (Appendix B in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>). For instance, glioblastoma (GBM)-related genes found by ProGENI were significantly associated with receptor proteins in the presynaptic active zone and excitatory synapse, whose altered expression can enhance gliomas ability to grow and survive [<xref ref-type="bibr" rid="pbio.3000583.ref052">52</xref>] (Bonferroni corrected <italic>p</italic> = 6.0 × 10<sup>−3</sup>). Similarly, AML-related genes were enriched for platelet activation, shown to be related to blast proliferation [<xref ref-type="bibr" rid="pbio.3000583.ref053">53</xref>] (Bonferroni corrected <italic>p</italic> = 2.0 × 10<sup>−6</sup>). The extent to which significant functional properties can be associated with a gene set extracted by genomics analyses is one measure of the utility of that gene set [<xref ref-type="bibr" rid="pbio.3000583.ref054">54</xref>]. Thus, we summarized the results of gene set characterization by noting the most statistically significant functional enrichment (of genes prioritized) for each tumor type. We noted that when the same process was repeated using genes reported by the standard gene prioritization pipeline the functional enrichments tended to be less prominent (<xref ref-type="fig" rid="pbio.3000583.g004">Fig 4D</xref>), thus providing further evidence of the value of knowledge-guided gene prioritization. The same conclusion was reached when a different network (STRING text mining) was used in gene prioritization instead of the HumanNet integrated network (Appendix B in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>).</p>
</sec>
<sec id="sec015">
<title>Pan-cancer signature from prioritized genes</title>
<p>Sets of genes of particular relevance to a tumor type are often used as a “signature” of that tumor, i.e., a representative gene set that captures much of the diagnostic or prognostic value of the entire expression profile. The PAM50 signature of breast cancer is a prime example [<xref ref-type="bibr" rid="pbio.3000583.ref036">36</xref>], being used for patient stratification based on expression of a small set of genes. We asked if the tumor-associated genes prioritized above for each tumor type together form a similar signature with prognostic value in a pan-cancer context. Indeed, we observed that pan-cancer subtypes obtained from clustering only the expression of the tumor-associated genes were just as predictive of survival (Kaplan-Meier <italic>p</italic> = 3.8 × 10<sup>−175</sup>) as the above-mentioned clusters based on entire expression profiles (p = 1.2 × 10<sup>−169</sup>; see Appendix C in <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>). This finding was robust to the use of different networks (or no network) in the gene prioritization step.</p>
</sec>
</sec>
<sec id="sec016">
<title>Case Study 3: Signature analysis and gene set characterization on a third-party system</title>
<p>Our next case study makes use of a fourth pipeline—Signature Analysis (<xref ref-type="fig" rid="pbio.3000583.g005">Fig 5A</xref>)—to study a transcriptomic data set of ESCC samples [<xref ref-type="bibr" rid="pbio.3000583.ref033">33</xref>] and also showcases how KnowEnG tools can be invoked on computing infrastructures external to the platform (<xref ref-type="fig" rid="pbio.3000583.g005">Fig 5B</xref>). Although the KnowEnG web portal offers a flexible graphical user interface, advanced users performing bioinformatics analysis on a different computing framework may prefer to avail of KnowEnG pipelines on that external framework directly, without tedious transfer of data, intermediate results, or code from one system to another.</p>
<fig id="pbio.3000583.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pbio.3000583.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Signature analysis pipeline.</title>
<p>(A) Each user-uploaded expression profile (sample) is matched against expression profiles in a predetermined collection (signatures) and match scores for all sample-signature pairs are reported by the pipeline. (B) Signature Analysis and other KnowEnG pipelines can be executed seamlessly on the third-party platform of SB-CGC that hosts a large repository of cancer data and associated tools. The pipelines are published on SB-CGC as a native workflow and the Knowledge Network is transferred “under the hood” from the KnowEnG Cloud when needed by a pipeline. (C) Signature Analysis of 79 ESCC samples, distributed into 3 subgroups, matched against 4 LUSC signatures (subtypes) using Spearman’s Correlation Coefficient. (D) Signature analysis of 551 LUSC samples available on the SB-CGC, matched against 4 LUSC signatures. ESCC, esophageal squamous cell carcinoma; KnowEnG, Knowledge Engine for Genomics; LUSC, lung squamous cell carcinoma; SB-CGC, Seven Bridges Cancer Genomics Cloud.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.g005" xlink:type="simple"/>
</fig>
<sec id="sec017">
<title>Interoperability</title>
<p>KnowEnG currently offers such seamless interoperability with the SB-CGC, which provides researchers with secure access to public data sets such as TCGA and TARGET. We used SB-CGC to access RNA-seq data for the previously reported ESCC tumor samples [<xref ref-type="bibr" rid="pbio.3000583.ref033">33</xref>] and created a transcriptomic spreadsheet (genes x samples) for further analysis with KnowEnG pipelines in the SB-CGC environment (<xref ref-type="fig" rid="pbio.3000583.g005">Fig 5B</xref>, Appendix A in <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>). This is made possible by the publication of KnowEnG pipelines as native workflows on the SB-CGC, with simple graphical interfaces, and creates opportunities for synergistic use of functionalities offered by these 2 powerful genomics computing platforms. (External availability of KnowEnG pipelines includes seamless access to the massive Knowledge Network that supports knowledge-guided analysis). Interoperability is an important tenet of the emerging vision of computing infrastructures of the future. It was achieved by using 2 emerging technologies—Docker containers [<xref ref-type="bibr" rid="pbio.3000583.ref005">5</xref>] to make the underlying software of each pipeline portable and Common Workflow Language (CWL) [<xref ref-type="bibr" rid="pbio.3000583.ref055">55</xref>] to provide a standardized description of the pipeline (Appendix H in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>). This alternative mode of KnowEnG usage also facilitates reproducibility and reusability; e.g., users may share their project on SB-CGC with collaborators. Thus, by ensuring interoperability and reusability, in addition to accessibility and findability already offered by the cloud-based web platform, the KnowEnG-CGC joint framework takes a major step toward the realization of the FAIR principles of modern data science.</p>
</sec>
<sec id="sec018">
<title>Signature analysis for patient subtyping</title>
<p>Operating within the SB-CGC framework, we performed a signature analysis of 79 ESCC patients as reported in the original TCGA publication. Signature analysis [<xref ref-type="bibr" rid="pbio.3000583.ref056">56</xref>] is a widely used method in cancer informatics and has been used for various tasks such as identifying subtypes [<xref ref-type="bibr" rid="pbio.3000583.ref036">36</xref>], characterizing purity of tumor samples [<xref ref-type="bibr" rid="pbio.3000583.ref057">57</xref>], determining the abundance of immune cells in tumor microenvironment [<xref ref-type="bibr" rid="pbio.3000583.ref058">58</xref>], characterizing transitions involved in the invasion-metastasis cascade [<xref ref-type="bibr" rid="pbio.3000583.ref043">43</xref>], etc. Here, given a spreadsheet of transcriptomic profiles of a cohort of patients, and a second spreadsheet of predetermined expression signatures, the pipeline finds the closest matching signature for each patient (<xref ref-type="fig" rid="pbio.3000583.g005">Fig 5A</xref>). This often allows existing insights about the signature to shed light on clinical characteristics of the patient based on their molecular profile. Following the original publication, we matched ESCC samples to signatures representing 4 subtypes of LUSC [<xref ref-type="bibr" rid="pbio.3000583.ref059">59</xref>], because the 2 cancers are anatomically adjacent and previously established subtypes of LUSC may be relevant to ESCC as well (Appendix B in <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>). We noted that one cluster of ESCC patients (“ESCC1,” identified in the original publication) mostly (65%) resembled the classical subtype of LUSC, whereas the second main cluster (“ESCC2”) mostly (63%) matched the basal subtype of LUSC (<xref ref-type="fig" rid="pbio.3000583.g005">Fig 5C</xref>), and fewer samples matched the primitive and secretory subtypes. The correspondence discovered between ab initio detected ESCC subtypes and previously reported LUSC subtypes is generally consistent with the observations of the original TCGA esophageal carcinoma analysis, who note that tumors matching the classical expression subtype also had similar somatic alterations to the subtype and were associated with poor prognosis and chemotherapeutic resistance. To highlight the convenience of co-localizing the analysis workflows with the data on the SB-CGC, we reran the analysis by simply substituting an alternate TCGA data set of LUSC tumor samples, again finding the classical subtype (40%) to be the most prevalent (<xref ref-type="fig" rid="pbio.3000583.g005">Fig 5D</xref>).</p>
</sec>
<sec id="sec019">
<title>Pathway analysis of subtype-associated genes</title>
<p>Having categorized ESCC patients into one of 4 subtypes using signature analysis, we next used the standard gene prioritization pipeline to identify genes associated with each subtype and subjected the resulting subtype-associated gene lists (Appendix C in <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>) to further analysis using the gene set characterization pipeline introduced above. We now used the knowledge-guided version of this pipeline, which instead of performing the traditional “enrichment test” between sets [<xref ref-type="bibr" rid="pbio.3000583.ref060">60</xref>], uses a random-walk algorithm with the user-provided gene set as “restart nodes,” to find property nodes of the Knowledge Network that are most related to the given gene set (<xref ref-type="fig" rid="pbio.3000583.g006">Fig 6A</xref>). This class of algorithms has been successfully used to quantify the relationship between network nodes in a variety of domains such as web mining [<xref ref-type="bibr" rid="pbio.3000583.ref061">61</xref>] and social network analysis [<xref ref-type="bibr" rid="pbio.3000583.ref062">62</xref>]. The KnowEnG pipeline uses an implementation called “DRaWR” [<xref ref-type="bibr" rid="pbio.3000583.ref024">24</xref>], the main advantage of which compared to enrichment tests is that it examines not only properties with which the given genes are annotated but also the properties with which genes related to the given genes are annotated (Appendix C in <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>). We have previously used DRaWR to characterize gene sets in <italic>Drosophila</italic> development [<xref ref-type="bibr" rid="pbio.3000583.ref024">24</xref>] and cancer [<xref ref-type="bibr" rid="pbio.3000583.ref063">63</xref>]. Here, we used the DRaWR-based knowledge-guided gene set characterization pipeline with the HumanNet Integrated network [<xref ref-type="bibr" rid="pbio.3000583.ref039">39</xref>] as the underlying network to identify, for ESCC subtype-related genes, the most related pathways in the Enrichr Pathways Collection [<xref ref-type="bibr" rid="pbio.3000583.ref064">64</xref>]. (The pipeline offers several options for the network as well as the properties to be ranked; see Appendix C in <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>). As a point of contrast, we also analyzed the gene sets with the standard version of the pipeline that uses the traditional Hypergeometric test approach [<xref ref-type="bibr" rid="pbio.3000583.ref065">65</xref>]. <xref ref-type="fig" rid="pbio.3000583.g006">Fig 6B</xref> tabulates 12 discovered pathway associations for ESCC subtypes that were reported by the DRaWR-based version of the pipeline but not by the standard version. Even though these associations do not meet the traditional criterion of significant set overlap, there is support in the literature for 7 of the 12 associations. Moreover, the top-ranked association was between basal subtype of ESCC and the gastric cancer network, which is credible given the close relationship between ESCC and gastric cancer, which are anatomically adjacent and share several risk factors [<xref ref-type="bibr" rid="pbio.3000583.ref066">66</xref>]. Surprisingly, this association was not detected by the enrichment test performed in the standard pipeline. Another interesting example is the primitive subtype being linked to FOXM1 transcription factor network but only by the DRaWR-based pipeline. FOXM1 has been found to be related to ESCC progression [<xref ref-type="bibr" rid="pbio.3000583.ref067">67</xref>] and to be a potential drug target; our finding of a specific association with the primitive subtype of ESCC suggests that the tumor subtype may be an important factor to consider in its therapeutic significance. We also found several subtype-pathway associations reported by both versions of the pipeline (<xref ref-type="fig" rid="pbio.3000583.g006">Fig 6C</xref>). For instance, both the basal and classical subtypes were associated with NRF2 pathway [<xref ref-type="bibr" rid="pbio.3000583.ref068">68</xref>], the secretory subtype was linked to Syndecan-1 mediated signaling event [<xref ref-type="bibr" rid="pbio.3000583.ref069">69</xref>], and the primitive subtype to oxidation by Cytochromes P450 [<xref ref-type="bibr" rid="pbio.3000583.ref070">70</xref>]. A total of 6 of the 13 such associations found by enrichment-based as well as DRaWR-based gene set characterization had circumstantial evidence in the literature.</p>
<fig id="pbio.3000583.g006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pbio.3000583.g006</object-id>
<label>Fig 6</label>
<caption>
<title>Knowledge-Guided GSC.</title>
<p>(A) Common approaches to GSC examine the overlap (top) between a user-provided gene set (e.g., genes A, D, E) and genes in a pathway (e.g., A, D, B in pathway P1). In the (bottom) knowledge network-guided mode (algorithm DRaWR), the association between 2 gene sets is based not only on direct overlap between them but also on network-based proximity between them. (B) LUSC subtype-associated pathways found exclusively with network-guided GSC pipeline using DRaWR. (C) Pathways associated with LUSC subtypes found by standard as well as network-guided GSC pipelines. GSC, gene set characterization; LUSC, lung squamous cell carcinoma.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.g006" xlink:type="simple"/>
</fig>
<p>In summary, this case study illustrates how different KnowEnG pipelines, in this case, beginning with signature analysis and followed by gene prioritization and gene set characterization, can be used in a workflow to not only relate patient profiles to previously reported cancer subtypes but also to glean novel insights about genes and pathways differentiating patients matched to different subtypes. We performed these analyses on a system external to KnowEnG (i.e., SB-CGC), but the same workflow may be executed on the KnowEnG platform as well, and the interface facilitates easy “stringing” of multiple pipelines to enable such workflows.</p>
</sec>
</sec>
</sec>
<sec id="sec020" sec-type="conclusions">
<title>Discussion</title>
<p>KnowEnG is an analysis engine designed and implemented with the needs and trends of modern genomics research in mind. KnowEnG offers a vision of genomic computing that is complementary to the dominant paradigm where software packages (e.g., in R or python) are installed on the user’s computer and executed locally. The current paradigm is convenient as long as data sets predominantly reside locally, but with the on-going movement toward massive data sets in the public domain [<xref ref-type="bibr" rid="pbio.3000583.ref071">71</xref>] and a clear need for moving tools to co-locate with these data, we expect the alternative paradigm embraced by KnowEnG to be increasingly relevant. Its main platform provides a convenient way to analyze the user’s uploaded spreadsheets while exploiting massive knowledgebases encoded in the Knowledge Network, and its interoperability with major cloud-based platforms such as SB-CGC showcases the advantages of tools moving to data sources while maintaining the convenient “illusion” of local computation.</p>
<sec id="sec021">
<title>Comparison with existing frameworks</title>
<p>KnowEnG embodies many of the powerful ideas to have emerged in the genomics research over the last decade, including knowledge-guided analysis, cloud-based storage and computing, machine learning and network-mining algorithms, and the FAIR principles for broader impact. KnowEnG draws inspiration from existing analytic tools and systems that have brought the above ideas to the fore. For instance, if we consider noncommercial platforms with web portals that offer multiple analytics functionalities for genomics data sets, some prominent examples that come to mind include Galaxy [<xref ref-type="bibr" rid="pbio.3000583.ref072">72</xref>], FireCloud/Terra [<xref ref-type="bibr" rid="pbio.3000583.ref073">73</xref>], cBioPortal [<xref ref-type="bibr" rid="pbio.3000583.ref074">74</xref>], NCI Genomic Data Commons (GDC) Data Portal [<xref ref-type="bibr" rid="pbio.3000583.ref002">2</xref>], GenePattern [<xref ref-type="bibr" rid="pbio.3000583.ref075">75</xref>], GeneWeaver [<xref ref-type="bibr" rid="pbio.3000583.ref076">76</xref>], GeneMANIA [<xref ref-type="bibr" rid="pbio.3000583.ref077">77</xref>], and GenomeSpace [<xref ref-type="bibr" rid="pbio.3000583.ref078">78</xref>], among others. Below we clarify the unique position of KnowEnG in the rich milieu of genomics platforms available today.</p>
<p>The Galaxy platform provides convenient wrappers around popular external tools, mainly for next generation sequencing data analysis and many other tasks such as meta-genomics, phylogenetics, and sequence analysis that are outside the scope of KnowEnG. However, Galaxy is not geared toward knowledge-guided analysis of users’ omics data spreadsheets, and its “downstream” analysis functions do not include sample clustering, gene prioritization for numeric phenotypic scores, or signature analysis, nor do they include the powerful spreadsheet visualization capability of KnowEnG. FireCloud/Terra similarly allows users to perform a variety of analyses of their own data as well as hosted data sets but does not offer, as KnowEnG does, knowledge-guided analysis or a rich visual interface. The popular cancer-related platforms of cBioPortal and NCI GDC are mainly geared toward visualization and exploration of omics spreadsheets extracted from the data sets they host rather than users’ spreadsheets and also differ significantly from KnowEnG in terms of analytics tasks offered, including knowledge-guided tools. Moreover, FireCloud, cBioPortal, and NCI GDC are entirely focused on cancer data analysis. In contrast, KnowEnG tools are meant for analysis of user-provided omics spreadsheets from any biological domain in any of its 20 currently supported species.</p>
<p>GenePattern is a web-based portal that offers a number of modules for genomics data analysis, similar to KnowEnG pipelines, with similar functions. However, these general-purpose machine learning tools do not fall in the genre of knowledge-guided analysis and do not offer the option of exploiting prior knowledge. On the other hand, GeneWeaver and GeneMANIA are online portals for knowledge-guided analysis of user data but are limited to gene sets, similar to KnowEnG’s GSC pipeline, and do not provide tools such as sample clustering and gene prioritization for spreadsheet analysis. Another popular modern platform, GenomeSpace, is a warehouse of diverse virtual workflows that connect several external tools and databases and that have some overlaps with KnowEnG functionalities. However, it does not perform any analysis itself and acts more as a data highway for passing data from one tool to another and is thus dependent on external tools and computing platforms to ensure scalability of its workflows.</p>
<p>We reserve a special mention of the ‘geWorkBench’ [<xref ref-type="bibr" rid="pbio.3000583.ref079">79</xref>], an open-source platform provides standard tools for common tasks such as sample clustering, gene prioritization, and gene set enrichment, comparable to “baseline” options in the corresponding KnowEnG pipelines, as well as specialized tools [<xref ref-type="bibr" rid="pbio.3000583.ref080">80</xref>–<xref ref-type="bibr" rid="pbio.3000583.ref083">83</xref>] for analyzing a gene expression spreadsheet and a co-expression network simultaneously. However, geWorkBench has to be installed on the user’s machine, and its web version has a small subset of tools from its desktop version, reflecting the challenges of offering a centralized web portal to compute-intensive tools with complex inputs. KnowEnG pipelines, on the other hand, embrace the use of computationally intensive but often trivially parallelizable techniques such as bootstrap sampling because of the scalability afforded by cloud computing.</p>
<p>In summary, a careful comparison of the features and goals of some of the major contemporary analytical frameworks reveals that KnowEnG brings complementary capabilities to the user, either in terms of the actual analyses offered, in allowing knowledge-guided analysis, in focusing on user-provided spreadsheet data analysis, in providing a consolidated cloud-based back-end for scalable computation, or in its online interface for visualizing data and results. Importantly, KnowEnG also supports analysis for 20 species currently, making its scope broader than several of the above-mentioned tools that are restricted to human genomic analysis. This is a major feature because knowledge-guided analysis requires integration of prior knowledgebases with tools in a species-specific manner.</p>
</sec>
<sec id="sec022">
<title>Applications to other biological domains</title>
<p>KnowEnG pipelines analyze spreadsheets of genes (rows) by samples (columns), which tabulate numeric information about each gene in each sample. The information may come from a variety of sources, e.g., high-throughput transcriptomics assays using various technologies, mutation counts at the gene level, copy number variations, etc. The analytical approaches do not make strong assumptions about the source of the data and are, as a result, applicable to any number of biological domains, not just cancer, as we discuss next.</p>
<p>The gene prioritization pipeline may be used in any scenario where a spreadsheet of gene-level measurements (expression levels, mutation counts, copy numbers, epigenomic measurements, etc). is available on a collection of samples, along with a phenotypic score for each sample. For instance, Emad and colleagues [<xref ref-type="bibr" rid="pbio.3000583.ref047">47</xref>] used this pipeline to identify genes whose basal expression in a cancer cell line is predictive of the cell line’s response to a cytotoxic treatment. Similar analyses have been performed in other published studies [<xref ref-type="bibr" rid="pbio.3000583.ref048">48</xref>], although without incorporating a knowledge network. Other examples of potential applications to gene prioritization for numeric phenotypes include identifying genes whose brain expression levels are predictive of pheromone response in honeybees [<xref ref-type="bibr" rid="pbio.3000583.ref084">84</xref>], discovering genes predictive of growth rate in bacteria or yeast [<xref ref-type="bibr" rid="pbio.3000583.ref085">85</xref>], and identifying gene families whose size (number of paralogs) in a species is correlated with a numeric score of that species, e.g., eusociality index in bees [<xref ref-type="bibr" rid="pbio.3000583.ref086">86</xref>]. Indeed, the potential of this line of analysis is evidenced by the recent publication of a tool specifically for relating expression to traits (“TraitCorr” [<xref ref-type="bibr" rid="pbio.3000583.ref087">87</xref>]) as an R package. The task of identifying differentially expressed genes between 2 conditions (binary phenotypes) can also be performed, in a knowledge-guided manner, using the gene prioritization pipeline. The high utility of this task needs no introduction, and many tools are available for it [<xref ref-type="bibr" rid="pbio.3000583.ref088">88</xref>]. The unique value of the KnowEnG pipeline is that common statistical tests used for this task, e.g., <italic>t</italic> test or EdgeR [<xref ref-type="bibr" rid="pbio.3000583.ref089">89</xref>], can be combined with “smoothing” of gene expression values based on the Knowledge Network as well as subjected to “bootstrapping” for robustness. (We and others have already demonstrated the value of network-smoothing and bootstrapping in prior work on gene prioritization [<xref ref-type="bibr" rid="pbio.3000583.ref047">47</xref>]). These additional features of the pipeline are well supported by a cloud-based platform that offers easy scalability and prestored knowledge networks, thus avoiding the hassles of maintaining compute clusters and downloading large networks for a more traditional “local computation” such as those using Bioconductor packages [<xref ref-type="bibr" rid="pbio.3000583.ref090">90</xref>].</p>
<p>Clustering is a pervasive operation in bioinformatics and finds uses in a large number of scenarios. Clustering may be performed within KnowEnG for any of the 20 species supported by it, for any set of experimental conditions, and for any type of omics data that assign numeric measurements to genes, to reveal hidden groupings among the conditions. Whereas the common tools for gene expression clustering focus on the task of grouping genes, the KnowEnG Sample Clustering pipeline is geared toward finding groups of samples/conditions that have similar expression profiles. This distinction is crucial to its use of a knowledge network to guide the clustering, lends it a complementary strength, and is expected to be of increasing utility in the future as the practice of profiling tissue samples from individuals grows more popular [<xref ref-type="bibr" rid="pbio.3000583.ref091">91</xref>]. The most common uses of sample clustering are in identifying subgroups in cancer patients, based on transcriptomic as well as other omics data sets, e.g., identifying breast cancer subgroups from copy number variations [<xref ref-type="bibr" rid="pbio.3000583.ref092">92</xref>], colon cancer subgroups from gene expression data [<xref ref-type="bibr" rid="pbio.3000583.ref093">93</xref>], refinement of breast cancer subtypes based on microRNA expression profiles [<xref ref-type="bibr" rid="pbio.3000583.ref094">94</xref>], subtyping of different cancers from somatic mutation data [<xref ref-type="bibr" rid="pbio.3000583.ref095">95</xref>], to name a few. Other uses of clustering to group samples include clustering of type 2 diabetes patients as well as obese and healthy subjects to find that type 2 diabetes and obesity have similar expression profiles [<xref ref-type="bibr" rid="pbio.3000583.ref096">96</xref>], grouping of brain transcriptomes of honeybee nurses and foragers of different ages to show that each behavioral group has similar profiles [<xref ref-type="bibr" rid="pbio.3000583.ref097">97</xref>], clustering of Arabidopsis plants treated with plant activators [<xref ref-type="bibr" rid="pbio.3000583.ref098">98</xref>], etc.</p>
<p>The GSC pipeline addresses one of the most commonly performed tasks in genomics analysis, which is sometimes referred to as gene set enrichment analysis (GSEA) and often performed using the GSEA tool [<xref ref-type="bibr" rid="pbio.3000583.ref099">99</xref>] or through hypergeometric tests. Studies that use this analytical operation are too numerous to list here, but its popularity is evidenced by the huge following that online tools such as DAVID [<xref ref-type="bibr" rid="pbio.3000583.ref060">60</xref>] and Enrichr [<xref ref-type="bibr" rid="pbio.3000583.ref064">64</xref>] have. We nevertheless included this pipeline in KnowEnG because it is a natural follow-up for the gene prioritization pipeline, and we expect users to make use of it every time they identify top genes associated with a phenotype. Moreover, the KnowEnG pipeline offers 2 complementary approaches to the above task—the popular approach based on hypergeometric tests (as in DAVID) and a novel approach based on random walks with restart (RWR), which we have published previously [<xref ref-type="bibr" rid="pbio.3000583.ref024">24</xref>] and whose unique value we further demonstrate in Appendix D in <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>. The RWR-based method not only provides an alternative approach to identifying pathways, GO terms, etc., most relevant to a given set of genes, it does so while accounting for gene-gene relationships encoded in a knowledge network, according unique value to the KnowEnG pipeline. We have also published the use of the RWR-based method to characterize gene sets arising out of a brain transcriptomic study of social behavior in 3 different species [<xref ref-type="bibr" rid="pbio.3000583.ref100">100</xref>].</p>
</sec>
<sec id="sec023">
<title>Flexibility of KnowEnG functionalities</title>
<p>While KnowEnG offers analysis functionalities that have broad applicability, we recognize that there will be many scenarios in which the scientist may require modifications to those functionalities or add components to the available pipelines for maximum benefit. There are a few straightforward steps required for a developer to adding a new tool to the KnowEnG system (detailed in Appendix I in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>). All KnowEnG pipelines are published as Docker containers and also have CWL descriptions of their inputs, outputs, and execution steps. Such “packaging” of pipelines not only allows them to be used on any platform as part of the user’s established workflows, it also allows the user to add their own custom tools, implemented in R or other languages, into the KnowEnG framework. In some cases, the user may wish to execute a particular KnowEnG pipeline repeatedly, with varying configurations. They may achieve this relatively easily through the containerized versions of KnowEnG tools. Another desired dimension of flexibility in KnowEnG analytics is that of the Knowledge Network. Although we provide several predetermined networks as available options to choose from when running a KnowEnG pipeline, the user may wish to utilize a custom network specialized for their domain of enquiry. This is allowed within the KnowEnG framework, via the Network Prepper tool (Appendix E in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>). Flexibility is also supported by the multiple modes of access for KnowEnG tools, including the SB-CGC platform and Jupyter notebooks, which provide programming-savvy users several options for integration with other tools.</p>
<p>Despite our efforts to add provisions for flexible uses of KnowEnG functionalities, we recognize that the system will not be a one size fits all solution for bioinformatics analysis. We expect that most genomics projects will have an “upstream” analysis component in which the data are processed in a more domain- and technology-dependent manner, outside of KnowEnG. Our hope is that such upstream analysis will often result in creation of omics spreadsheets similar to the ones used in our case studies and that their downstream analysis (such as sample clustering, gene prioritization, GSC) can be carried out in the KnowEnG framework in a more universal manner, without the need for unduly cumbersome customizations.</p>
</sec>
</sec>
<sec id="sec024">
<title>Material and methods</title>
<p>The details of the data sets and KnowEnG analysis pipelines used in this article are fully described in the supporting information. For each of the 3 case studies, there is a corresponding supporting information file (<xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref>, <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref>, and <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref>) that details the methods related to that case study in several appendices. These appendices also include additional interpretations for each analyses as well as all of the nondefault run parameters needed to reproduce the results. Many sections contain links to additional resources in which the actual code, containers, or compute servers can be found. Specific instructions for easily reproducing the main analyses on the KnowEnG platform from the 3 case studies are available in Appendix F in <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref>. <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref> also contains additional information about the design and capabilities of the components of the KnowEnG platform.</p>
</sec>
<sec id="sec025">
<title>Supporting information</title>
<supplementary-material id="pbio.3000583.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s001" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Notes on the KnowEnG system.</title>
<p>Nine appendices of supplemental commentary and additional resources describing the KnowEnG system. KnowEnG, Knowledge Engine for Genomics.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s002" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s002" xlink:type="simple">
<label>S2 File</label>
<caption>
<title>Methods for sample clustering case study.</title>
<p>Seven appendices describing the data, pipelines, methods, and additional analyses that relate to the first case study of performing sample clustering on cancer patient data.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s003" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s003" xlink:type="simple">
<label>S3 File</label>
<caption>
<title>Methods for gene prioritization case study.</title>
<p>Three appendices describing the pipelines, methods, and additional analyses that relate to the second case study of performing gene prioritization with TCGA transcriptomic data. TCGA, the Cancer Genome Atlas.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s004" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s004" xlink:type="simple">
<label>S4 File</label>
<caption>
<title>Methods for signature analysis and GSC case study.</title>
<p>Four appendices describing the data, pipelines, methods, and additional analyses that relate to the third case study of using KnowEnG pipelines to analyze LUSC signatures in ESCC samples on a third-party system. ESCC, esophageal squamous cell carcinoma; KnowEnG, Knowledge Engine for Genomics; LUSC, lung squamous cell carcinoma.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s005" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s005" xlink:type="simple">
<label>S1 Data</label>
<caption>
<title>Tables for KnowEnG system notes.</title>
<p>Eight supplementary tables accompanying <xref ref-type="supplementary-material" rid="pbio.3000583.s001">S1 File</xref> notes on the KnowEnG system. KnowEnG, Knowledge Engine for Genomics.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s006" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s006" xlink:type="simple">
<label>S2 Data</label>
<caption>
<title>Tables for sample clustering case study.</title>
<p>Thirteen supplementary tables accompanying <xref ref-type="supplementary-material" rid="pbio.3000583.s002">S2 File</xref> appendices related to the sample clustering case study.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s007" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s007" xlink:type="simple">
<label>S3 Data</label>
<caption>
<title>Tables for gene prioritization case study.</title>
<p>Seventeen supplementary tables accompanying <xref ref-type="supplementary-material" rid="pbio.3000583.s003">S3 File</xref> appendices related to the gene prioritization case study.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pbio.3000583.s008" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s008" xlink:type="simple">
<label>S4 Data</label>
<caption>
<title>Tables for signature analysis and GSC case study.</title>
<p>Fourteen supplementary tables accompanying <xref ref-type="supplementary-material" rid="pbio.3000583.s004">S4 File</xref> appendices related to the signature analysis and GSC case study.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We thank our NIH colleagues, especially Ishwar Chandramouliswaran for his guidance regarding interoperability with Seven Bridges Genomics Cancer Genomics Cloud. We are grateful to the Roy Campbell Systems Research Group, UIUC, and the NIH-BD2K Common Credits pilot program for contributing additional computational resources to perform this study. We also appreciate the assistance and efforts from Seven Bridges Genomics Inc, and from the following UIUC personnel and students: Suyang Chen, Joerg Heintz, Henry Lin, Daniel Meling, Shreya Nagesh, Nathan T. Russell, Noor Shalabi, Jackson W.G. Vaughan, Paul Vijayakumar, Svetlana Vranic-Sowers, and Zhuojun Yao.</p>
</ack>
<glossary>
<title>Abbreviations</title>
<def-list>
<def-item><term>AML</term>
<def><p>acute myeloid leukemia</p></def>
</def-item>
<def-item><term>ARI</term>
<def><p>adjusted Rand index</p></def>
</def-item>
<def-item><term>AWS</term>
<def><p>Amazon Web Services</p></def>
</def-item>
<def-item><term>BRCA</term>
<def><p>breast cancer</p></def>
</def-item>
<def-item><term>CDH1</term>
<def><p>cadherin 1</p></def>
</def-item>
<def-item><term>COCA</term>
<def><p>cluster of cluster assignment</p></def>
</def-item>
<def-item><term>CTNNB1</term>
<def><p>catenin beta 1</p></def>
</def-item>
<def-item><term>CWL</term>
<def><p>common workflow language</p></def>
</def-item>
<def-item><term>EGFR</term>
<def><p>epidermal growth factor receptor</p></def>
</def-item>
<def-item><term>EMT</term>
<def><p>epithelial to mesenchymal transition</p></def>
</def-item>
<def-item><term>ESCC</term>
<def><p>esophageal squamous cell carcinoma</p></def>
</def-item>
<def-item><term>ESR1</term>
<def><p>estrogen receptor 1</p></def>
</def-item>
<def-item><term>FAIR</term>
<def><p>findable, accessible, interoperable, and reusable</p></def>
</def-item>
<def-item><term>FOXA1</term>
<def><p>forkhead box A1</p></def>
</def-item>
<def-item><term>FOXM1</term>
<def><p>forkhead box M1</p></def>
</def-item>
<def-item><term>GATA3</term>
<def><p>GATA binding protein 3</p></def>
</def-item>
<def-item><term>GBM</term>
<def><p>glioblastoma</p></def>
</def-item>
<def-item><term>GDC</term>
<def><p>Genomic Data Commons</p></def>
</def-item>
<def-item><term>GO</term>
<def><p>Gene Ontology</p></def>
</def-item>
<def-item><term>HNSCC</term>
<def><p>head and neck squamous cell carcinoma</p></def>
</def-item>
<def-item><term>KnowEnG</term>
<def><p>Knowledge Engine for Genomics</p></def>
</def-item>
<def-item><term>KRAS</term>
<def><p>Kirsten rat sarcoma viral oncogene homolog</p></def>
</def-item>
<def-item><term>LUSC</term>
<def><p>lung squamous cell carcinoma</p></def>
</def-item>
<def-item><term>miRNA</term>
<def><p>microRNA</p></def>
</def-item>
<def-item><term>NBS</term>
<def><p>network-based stratification</p></def>
</def-item>
<def-item><term>NRAS</term>
<def><p>neuroblastoma RAS viral oncogene homolog</p></def>
</def-item>
<def-item><term>NRF2</term>
<def><p>nuclear factor erythroid 2-related factor 2</p></def>
</def-item>
<def-item><term>PAM50</term>
<def><p>Prediction Analysis of Microarray 50</p></def>
</def-item>
<def-item><term>RNA-seq</term>
<def><p>RNA sequencing</p></def>
</def-item>
<def-item><term>RWR</term>
<def><p>random walk with restart</p></def>
</def-item>
<def-item><term>SB-CGC</term>
<def><p>Seven Bridges Cancer Genomics Cloud</p></def>
</def-item>
<def-item><term>TCGA</term>
<def><p>The Cancer Genome Atlas</p></def>
</def-item>
</def-list>
</glossary>
<ref-list>
<title>References</title>
<ref id="pbio.3000583.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Stephens</surname> <given-names>ZD</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>SY</given-names></name>, <name name-style="western"><surname>Faghri</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Campbell</surname> <given-names>RH</given-names></name>, <name name-style="western"><surname>Zhai</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Efron</surname> <given-names>MJ</given-names></name>, <etal>et al</etal>. <article-title>Big Data: Astronomical or Genomical?</article-title> <source>PLoS Biol</source>. <year>2015</year>;<volume>13</volume>(<issue>7</issue>):<fpage>e1002195</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pbio.1002195" xlink:type="simple">10.1371/journal.pbio.1002195</ext-link></comment> <object-id pub-id-type="pmid">26151137</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jensen</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Ferretti</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Grossman</surname> <given-names>RL</given-names></name>, <name name-style="western"><surname>Staudt</surname> <given-names>LM</given-names></name>. <article-title>The NCI Genomic Data Commons as an engine for precision medicine</article-title>. <source>Blood</source>. <year>2017</year>;<volume>130</volume>(<issue>4</issue>):<fpage>453</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1182/blood-2017-03-735654" xlink:type="simple">10.1182/blood-2017-03-735654</ext-link></comment> <object-id pub-id-type="pmid">28600341</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bui</surname> <given-names>AAT</given-names></name>, <name name-style="western"><surname>Van Horn</surname> <given-names>JD</given-names></name>, <collab>Consortium NBKC</collab>. <article-title>Envisioning the future of ‘big data’ biomedicine</article-title>. <source>J Biomed Inform</source>. <year>2017</year>;<volume>69</volume>:<fpage>115</fpage>–<lpage>7</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2017.03.017" xlink:type="simple">10.1016/j.jbi.2017.03.017</ext-link></comment> <object-id pub-id-type="pmid">28366789</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wilkinson</surname> <given-names>MD</given-names></name>, <name name-style="western"><surname>Dumontier</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Aalbersberg</surname> <given-names>IJ</given-names></name>, <name name-style="western"><surname>Appleton</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Axton</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Baak</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>The FAIR Guiding Principles for scientific data management and stewardship</article-title>. <source>Sci Data</source>. <year>2016</year>;<volume>3</volume>:<fpage>160018</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/sdata.2016.18" xlink:type="simple">10.1038/sdata.2016.18</ext-link></comment> <object-id pub-id-type="pmid">26978244</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Merkel</surname> <given-names>D</given-names></name>. <article-title>Docker: lightweight linux containers for consistent development and deployment</article-title>. <source>Linux Journal</source>. <year>2014</year>;<volume>2014</volume>(<issue>239</issue>):<fpage>2</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref006"><label>6</label><mixed-citation publication-type="other" xlink:type="simple">Kluyver T, Ragan-Kelley B, Pérez F, Granger BE, Bussonnier M, Frederic J, et al., editors. Jupyter Notebooks-a publishing format for reproducible computational workflows. ELPUB; 2016.</mixed-citation></ref>
<ref id="pbio.3000583.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lau</surname> <given-names>JW</given-names></name>, <name name-style="western"><surname>Lehnert</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Sethi</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Malhotra</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Kaushik</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Onder</surname> <given-names>Z</given-names></name>, <etal>et al</etal>. <article-title>The Cancer Genomics Cloud: Collaborative, Reproducible, and Democratized-A New Paradigm in Large-Scale Computational Research</article-title>. <source>Cancer Res</source>. <year>2017</year>;<volume>77</volume>(<issue>21</issue>):<fpage>e3</fpage>–<lpage>e6</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1158/0008-5472.CAN-17-0387" xlink:type="simple">10.1158/0008-5472.CAN-17-0387</ext-link></comment> <object-id pub-id-type="pmid">29092927</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Grossman</surname> <given-names>RL</given-names></name>, <name name-style="western"><surname>Heath</surname> <given-names>AP</given-names></name>, <name name-style="western"><surname>Ferretti</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Varmus</surname> <given-names>HE</given-names></name>, <name name-style="western"><surname>Lowy</surname> <given-names>DR</given-names></name>, <name name-style="western"><surname>Kibbe</surname> <given-names>WA</given-names></name>, <etal>et al</etal>. <article-title>Toward a shared vision for cancer genomic data</article-title>. <source>New England Journal of Medicine</source>. <year>2016</year>;<volume>375</volume>(<issue>12</issue>):<fpage>1109</fpage>–<lpage>12</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1056/NEJMp1607591" xlink:type="simple">10.1056/NEJMp1607591</ext-link></comment> <object-id pub-id-type="pmid">27653561</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ashburner</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Ball</surname> <given-names>CA</given-names></name>, <name name-style="western"><surname>Blake</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Botstein</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Butler</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Cherry</surname> <given-names>JM</given-names></name>, <etal>et al</etal>. <article-title>Gene ontology: tool for the unification of biology. The Gene Ontology Consortium</article-title>. <source>Nat Genet</source>. <year>2000</year>;<volume>25</volume>(<issue>1</issue>):<fpage>25</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/75556" xlink:type="simple">10.1038/75556</ext-link></comment> <object-id pub-id-type="pmid">10802651</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Fabregat</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Jupe</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Matthews</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Sidiropoulos</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Gillespie</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Garapati</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>The Reactome Pathway Knowledgebase</article-title>. <source>Nucleic Acids Res</source>. <year>2018</year>;<volume>46</volume>(<issue>D1</issue>):<fpage>D649</fpage>–<lpage>D55</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkx1132" xlink:type="simple">10.1093/nar/gkx1132</ext-link></comment> <object-id pub-id-type="pmid">29145629</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hofree</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Shen</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Carter</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Gross</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Ideker</surname> <given-names>T</given-names></name>. <article-title>Network-based stratification of tumor mutations</article-title>. <source>Nat Methods</source>. <year>2013</year>;<volume>10</volume>(<issue>11</issue>):<fpage>1108</fpage>–<lpage>15</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nmeth.2651" xlink:type="simple">10.1038/nmeth.2651</ext-link></comment> <object-id pub-id-type="pmid">24037242</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yu</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Yu</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>J</given-names></name>. <article-title>Network-aided Bi-Clustering for discovering cancer subtypes</article-title>. <source>Scientific reports</source>. <year>2017</year>;<volume>7</volume>(<issue>1</issue>):<fpage>1046</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41598-017-01064-0" xlink:type="simple">10.1038/s41598-017-01064-0</ext-link></comment> <object-id pub-id-type="pmid">28432308</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Liu</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Gu</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Hou</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Han</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Ma</surname> <given-names>J</given-names></name>. <article-title>A network-assisted co-clustering algorithm to discover cancer subtypes based on gene expression</article-title>. <source>BMC bioinformatics</source>. <year>2014</year>;<volume>15</volume>(<issue>1</issue>):<fpage>37</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wu</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Xu</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Fang</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Tong</surname> <given-names>W</given-names></name>, <etal>et al</etal>. <article-title>NETBAGs: a network-based clustering approach with gene signatures for cancer subtyping analysis</article-title>. <source>Biomarkers in medicine</source>. <year>2015</year>;<volume>9</volume>(<issue>11</issue>):<fpage>1053</fpage>–<lpage>65</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2217/bmm.15.96" xlink:type="simple">10.2217/bmm.15.96</ext-link></comment> <object-id pub-id-type="pmid">26501477</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hou</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Ma</surname> <given-names>J</given-names></name>. <article-title>DawnRank: discovering personalized driver genes in cancer</article-title>. <source>Genome medicine</source>. <year>2014</year>;<volume>6</volume>(<issue>7</issue>):<fpage>56</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13073-014-0056-8" xlink:type="simple">10.1186/s13073-014-0056-8</ext-link></comment> <object-id pub-id-type="pmid">25177370</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bourdakou</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Athanasiadis</surname> <given-names>EI</given-names></name>, <name name-style="western"><surname>Spyrou</surname> <given-names>GM</given-names></name>. <article-title>Discovering gene re-ranking efficiency and conserved gene-gene relationships derived from gene co-expression network analysis on breast cancer data</article-title>. <source>Scientific reports</source>. <year>2016</year>;<volume>6</volume>:<fpage>20518</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/srep20518" xlink:type="simple">10.1038/srep20518</ext-link></comment> <object-id pub-id-type="pmid">26892392</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cho</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Shim</surname> <given-names>JE</given-names></name>, <name name-style="western"><surname>Kim</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Supek</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Lehner</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>I</given-names></name>. <article-title>MUFFINN: cancer gene discovery via network analysis of somatic mutation data</article-title>. <source>Genome biology</source>. <year>2016</year>;<volume>17</volume>(<issue>1</issue>):<fpage>129</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13059-016-0989-x" xlink:type="simple">10.1186/s13059-016-0989-x</ext-link></comment> <object-id pub-id-type="pmid">27333808</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Valentini</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Paccanaro</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Caniza</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Romero</surname> <given-names>AE</given-names></name>, <name name-style="western"><surname>Re</surname> <given-names>M</given-names></name>. <article-title>An extensive analysis of disease-gene associations using network integration and fast kernel-based gene prioritization methods</article-title>. <source>Artificial Intelligence in Medicine</source>. <year>2014</year>;<volume>61</volume>(<issue>2</issue>):<fpage>63</fpage>–<lpage>78</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.artmed.2014.03.003" xlink:type="simple">10.1016/j.artmed.2014.03.003</ext-link></comment> <object-id pub-id-type="pmid">24726035</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Erten</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Bebek</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Koyutürk</surname> <given-names>M</given-names></name>. <article-title>Vavien: an algorithm for prioritizing candidate disease genes based on topological similarity of proteins in interaction networks</article-title>. <source>Journal of computational biology</source>. <year>2011</year>;<volume>18</volume>(<issue>11</issue>):<fpage>1561</fpage>–<lpage>74</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1089/cmb.2011.0154" xlink:type="simple">10.1089/cmb.2011.0154</ext-link></comment> <object-id pub-id-type="pmid">22035267</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Guo</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Dong</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Hu</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Cai</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Tang</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Dou</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Biased random walk model for the prioritization of drug resistance associated proteins</article-title>. <source>Scientific reports</source>. <year>2015</year>;<volume>5</volume>:<fpage>10857</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/srep10857" xlink:type="simple">10.1038/srep10857</ext-link></comment> <object-id pub-id-type="pmid">26039373</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Ota</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Shridhar</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Chien</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Kuang</surname> <given-names>R</given-names></name>. <article-title>Network-based survival analysis reveals subnetwork signatures for predicting outcomes of ovarian cancer treatment</article-title>. <source>PLoS Comput Biol</source>. <year>2013</year>;<volume>9</volume>(<issue>3</issue>):<fpage>e1002975</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1002975" xlink:type="simple">10.1371/journal.pcbi.1002975</ext-link></comment> <object-id pub-id-type="pmid">23555212</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Xu</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Moni</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Liò</surname> <given-names>P</given-names></name>. <article-title>Network regularised Cox regression and multiplex network models to predict disease comorbidities and survival of cancer</article-title>. <source>Computational biology and chemistry</source>. <year>2015</year>;<volume>59</volume>:<fpage>15</fpage>–<lpage>31</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.compbiolchem.2015.08.010" xlink:type="simple">10.1016/j.compbiolchem.2015.08.010</ext-link></comment> <object-id pub-id-type="pmid">26611766</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kim</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Pan</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Shen</surname> <given-names>X</given-names></name>. <article-title>Network-based penalized regression with application to genomic data</article-title>. <source>Biometrics</source>. <year>2013</year>;<volume>69</volume>(<issue>3</issue>):<fpage>582</fpage>–<lpage>93</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/biom.12035" xlink:type="simple">10.1111/biom.12035</ext-link></comment> <object-id pub-id-type="pmid">23822182</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Blatti</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Sinha</surname> <given-names>S</given-names></name>. <article-title>Characterizing gene sets using discriminative random walks with restart on heterogeneous biological networks</article-title>. <source>Bioinformatics</source>. <year>2016</year>;<volume>32</volume>(<issue>14</issue>):<fpage>2167</fpage>–<lpage>75</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btw151" xlink:type="simple">10.1093/bioinformatics/btw151</ext-link></comment> <object-id pub-id-type="pmid">27153592</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Dong</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Hao</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Tian</surname> <given-names>W</given-names></name>. <article-title>LEGO: a novel method for gene set over-representation analysis by incorporating network-based gene weights</article-title>. <source>Scientific reports</source>. <year>2016</year>;<volume>6</volume>:<fpage>18871</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/srep18871" xlink:type="simple">10.1038/srep18871</ext-link></comment> <object-id pub-id-type="pmid">26750448</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Di Lena</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Martelli</surname> <given-names>PL</given-names></name>, <name name-style="western"><surname>Fariselli</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Casadio</surname> <given-names>R</given-names></name>. <article-title>NET-GE: a novel NETwork-based Gene Enrichment for detecting biological processes associated to Mendelian diseases</article-title>. <source>BMC genomics</source>. <year>2015</year>;<volume>16</volume>(<issue>8</issue>):<fpage>S6</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alexeyenko</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Pernemalm</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Guegan</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Dessen</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Lazar</surname> <given-names>V</given-names></name>, <etal>et al</etal>. <article-title>Network enrichment analysis: extension of gene-set enrichment analysis to gene networks</article-title>. <source>BMC bioinformatics</source>. <year>2012</year>;<volume>13</volume>(<issue>1</issue>):<fpage>226</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cornish</surname> <given-names>AJ</given-names></name>, <name name-style="western"><surname>Markowetz</surname> <given-names>F</given-names></name>. <article-title>SANTA: quantifying the functional content of molecular networks</article-title>. <source>PLoS Comput Biol</source>. <year>2014</year>;<volume>10</volume>(<issue>9</issue>):<fpage>e1003808</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1003808" xlink:type="simple">10.1371/journal.pcbi.1003808</ext-link></comment> <object-id pub-id-type="pmid">25210953</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mostafavi</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Morris</surname> <given-names>Q</given-names></name>. <article-title>Combining many interaction networks to predict gene function and analyze gene lists</article-title>. <source>Proteomics</source>. <year>2012</year>;<volume>12</volume>(<issue>10</issue>):<fpage>1687</fpage>–<lpage>96</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/pmic.201100607" xlink:type="simple">10.1002/pmic.201100607</ext-link></comment> <object-id pub-id-type="pmid">22589215</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wang</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Cho</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Zhai</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Berger</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Peng</surname> <given-names>J</given-names></name>. <article-title>Exploiting ontology graph for predicting sparsely annotated gene function</article-title>. <source>Bioinformatics</source>. <year>2015</year>;<volume>31</volume>(<issue>12</issue>):<fpage>i357</fpage>–<lpage>i64</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btv260" xlink:type="simple">10.1093/bioinformatics/btv260</ext-link></comment> <object-id pub-id-type="pmid">26072504</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pena-Castillo</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Tasan</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Myers</surname> <given-names>CL</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Joshi</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>C</given-names></name>, <etal>et al</etal>. <article-title>A critical assessment of Mus musculus gene function prediction using integrated genomic evidence</article-title>. <source>Genome Biol</source>. <year>2008</year>;<volume>9</volume> <issue>Suppl 1</issue>:<fpage>S2</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/gb-2008-9-s1-s2" xlink:type="simple">10.1186/gb-2008-9-s1-s2</ext-link></comment> <object-id pub-id-type="pmid">18613946</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hoadley</surname> <given-names>KA</given-names></name>, <name name-style="western"><surname>Yau</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Wolf</surname> <given-names>DM</given-names></name>, <name name-style="western"><surname>Cherniack</surname> <given-names>AD</given-names></name>, <name name-style="western"><surname>Tamborero</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Ng</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>Multiplatform analysis of 12 cancer types reveals molecular classification within and across tissues of origin</article-title>. <source>Cell</source>. <year>2014</year>;<volume>158</volume>(<issue>4</issue>):<fpage>929</fpage>–<lpage>44</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cell.2014.06.049" xlink:type="simple">10.1016/j.cell.2014.06.049</ext-link></comment> <object-id pub-id-type="pmid">25109877</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><collab>Cancer Genome Atlas Research N, Analysis Working Group: Asan U, Agency BCC, Brigham, Women’s H, Broad I</collab>, <etal>et al</etal>. <article-title>Integrated genomic characterization of oesophageal carcinoma</article-title>. <source>Nature</source>. <year>2017</year>;<volume>541</volume>(<issue>7636</issue>):<fpage>169</fpage>–<lpage>75</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature20805" xlink:type="simple">10.1038/nature20805</ext-link></comment> <object-id pub-id-type="pmid">28052061</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref034"><label>34</label><mixed-citation publication-type="other" xlink:type="simple">Faghri F, Hashemi SH, Babaeizadeh M, Nalls MA, Sinha S, Campbell RH. Toward Scalable Machine Learning and Data Mining: the Bioinformatics Case. CoRR. 2017;abs/1710.00112.</mixed-citation></ref>
<ref id="pbio.3000583.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alizadeh</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Eisen</surname> <given-names>MB</given-names></name>, <name name-style="western"><surname>Davis</surname> <given-names>RE</given-names></name>, <name name-style="western"><surname>Ma</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Lossos</surname> <given-names>IS</given-names></name>, <name name-style="western"><surname>Rosenwald</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>Distinct types of diffuse large B-cell lymphoma identified by gene expression profiling</article-title>. <source>Nature</source>. <year>2000</year>;<volume>403</volume>(<issue>6769</issue>):<fpage>503</fpage>–<lpage>11</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/35000501" xlink:type="simple">10.1038/35000501</ext-link></comment> <object-id pub-id-type="pmid">10676951</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Parker</surname> <given-names>JS</given-names></name>, <name name-style="western"><surname>Mullins</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Cheang</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Leung</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Voduc</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Vickery</surname> <given-names>T</given-names></name>, <etal>et al</etal>. <article-title>Supervised risk predictor of breast cancer based on intrinsic subtypes</article-title>. <source>J Clin Oncol</source>. <year>2009</year>;<volume>27</volume>(<issue>8</issue>):<fpage>1160</fpage>–<lpage>7</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1200/JCO.2008.18.1370" xlink:type="simple">10.1200/JCO.2008.18.1370</ext-link></comment> <object-id pub-id-type="pmid">19204204</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref037"><label>37</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>van Laarhoven</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Marchiori</surname> <given-names>E</given-names></name>. <article-title>Graph clustering with local search optimization: The resolution bias of the objective function matters most</article-title>. <source>Physical Review E</source>. <year>2013</year>;<volume>87</volume>(<issue>1</issue>):<fpage>012812</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref038"><label>38</label><mixed-citation publication-type="other" xlink:type="simple">Everitt BS, Landau S, Leese M. Cluster Analysis: Wiley; 2001.</mixed-citation></ref>
<ref id="pbio.3000583.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lee</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Blom</surname> <given-names>UM</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>PI</given-names></name>, <name name-style="western"><surname>Shim</surname> <given-names>JE</given-names></name>, <name name-style="western"><surname>Marcotte</surname> <given-names>EM</given-names></name>. <article-title>Prioritizing candidate disease genes by network-based boosting of genome-wide association data</article-title>. <source>Genome Res</source>. <year>2011</year>;<volume>21</volume>(<issue>7</issue>):<fpage>1109</fpage>–<lpage>21</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/gr.118992.110" xlink:type="simple">10.1101/gr.118992.110</ext-link></comment> <object-id pub-id-type="pmid">21536720</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref040"><label>40</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hubert</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Arabie</surname> <given-names>P</given-names></name>. <article-title>Comparing partitions</article-title>. <source>Journal of classification</source>. <year>1985</year>;<volume>2</volume>(<issue>1</issue>):<fpage>193</fpage>–<lpage>218</lpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref041"><label>41</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Monti</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Tamayo</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Mesirov</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Golub</surname> <given-names>T</given-names></name>. <article-title>Consensus clustering: a resampling-based method for class discovery and visualization of gene expression microarray data</article-title>. <source>Machine learning</source>. <year>2003</year>;<volume>52</volume>(<issue>1–2</issue>):<fpage>91</fpage>–<lpage>118</lpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref042"><label>42</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Curtis</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Shah</surname> <given-names>SP</given-names></name>, <name name-style="western"><surname>Chin</surname> <given-names>SF</given-names></name>, <name name-style="western"><surname>Turashvili</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Rueda</surname> <given-names>OM</given-names></name>, <name name-style="western"><surname>Dunning</surname> <given-names>MJ</given-names></name>, <etal>et al</etal>. <article-title>The genomic and transcriptomic architecture of 2,000 breast tumours reveals novel subgroups</article-title>. <source>Nature</source>. <year>2012</year>;<volume>486</volume>(<issue>7403</issue>):<fpage>346</fpage>–<lpage>52</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature10983" xlink:type="simple">10.1038/nature10983</ext-link></comment> <object-id pub-id-type="pmid">22522925</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref043"><label>43</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Emad</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Ray</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Jensen</surname> <given-names>TW</given-names></name>, <name name-style="western"><surname>Parat</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Natrajan</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Sinha</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>An epithelial-mesenchymal-amoeboid transition gene signature reveals molecular subtypes of breast cancer progression and metastasis</article-title>. <source>bioRxiv</source>. <year>2017</year>:<fpage>219410</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/219410" xlink:type="simple">10.1101/219410</ext-link></comment></mixed-citation></ref>
<ref id="pbio.3000583.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Szklarczyk</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Franceschini</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Wyder</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Forslund</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Heller</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Huerta-Cepas</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>STRING v10: protein-protein interaction networks, integrated over the tree of life</article-title>. <source>Nucleic Acids Res</source>. <year>2015</year>;<volume>43</volume>(<issue>Database issue</issue>):<fpage>D447</fpage>–<lpage>52</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gku1003" xlink:type="simple">10.1093/nar/gku1003</ext-link></comment> <object-id pub-id-type="pmid">25352553</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref045"><label>45</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Soneson</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Delorenzi</surname> <given-names>M</given-names></name>. <article-title>A comparison of methods for differential expression analysis of RNA-seq data</article-title>. <source>BMC Bioinformatics</source>. <year>2013</year>;<volume>14</volume>:<fpage>91</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2105-14-91" xlink:type="simple">10.1186/1471-2105-14-91</ext-link></comment> <object-id pub-id-type="pmid">23497356</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref046"><label>46</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rees</surname> <given-names>MG</given-names></name>, <name name-style="western"><surname>Seashore-Ludlow</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Cheah</surname> <given-names>JH</given-names></name>, <name name-style="western"><surname>Adams</surname> <given-names>DJ</given-names></name>, <name name-style="western"><surname>Price</surname> <given-names>EV</given-names></name>, <name name-style="western"><surname>Gill</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>Correlating chemical sensitivity and basal gene expression reveals mechanism of action</article-title>. <source>Nat Chem Biol</source>. <year>2016</year>;<volume>12</volume>(<issue>2</issue>):<fpage>109</fpage>–<lpage>16</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nchembio.1986" xlink:type="simple">10.1038/nchembio.1986</ext-link></comment> <object-id pub-id-type="pmid">26656090</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Emad</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Cairns</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Kalari</surname> <given-names>KR</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Sinha</surname> <given-names>S</given-names></name>. <article-title>Knowledge-guided gene prioritization reveals new insights into the mechanisms of chemoresistance</article-title>. <source>Genome Biol</source>. <year>2017</year>;<volume>18</volume>(<issue>1</issue>):<fpage>153</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13059-017-1282-3" xlink:type="simple">10.1186/s13059-017-1282-3</ext-link></comment> <object-id pub-id-type="pmid">28800781</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref048"><label>48</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Barretina</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Caponigro</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Stransky</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Venkatesan</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Margolin</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Kim</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>The Cancer Cell Line Encyclopedia enables predictive modelling of anticancer drug sensitivity</article-title>. <source>Nature</source>. <year>2012</year>;<volume>483</volume>(<issue>7391</issue>):<fpage>603</fpage>–<lpage>7</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature11003" xlink:type="simple">10.1038/nature11003</ext-link></comment> <object-id pub-id-type="pmid">22460905</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref049"><label>49</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rubio-Perez</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Tamborero</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Schroeder</surname> <given-names>MP</given-names></name>, <name name-style="western"><surname>Antolin</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Deu-Pons</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Perez-Llamas</surname> <given-names>C</given-names></name>, <etal>et al</etal>. <article-title>In silico prescription of anticancer drugs to cohorts of 28 tumor types reveals targeting opportunities</article-title>. <source>Cancer Cell</source>. <year>2015</year>;<volume>27</volume>(<issue>3</issue>):<fpage>382</fpage>–<lpage>96</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ccell.2015.02.007" xlink:type="simple">10.1016/j.ccell.2015.02.007</ext-link></comment> <object-id pub-id-type="pmid">25759023</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref050"><label>50</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Holst</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Stahl</surname> <given-names>PR</given-names></name>, <name name-style="western"><surname>Ruiz</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Hellwinkel</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Jehan</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Wendland</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Estrogen receptor alpha (ESR1) gene amplification is frequent in breast cancer</article-title>. <source>Nature Genetics</source>. <year>2007</year>;<volume>39</volume>:<fpage>655</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng2006" xlink:type="simple">10.1038/ng2006</ext-link></comment> <object-id pub-id-type="pmid">17417639</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref051"><label>51</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Forbes</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Beare</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Boutselakis</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Bamford</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Bindal</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Tate</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>COSMIC: somatic cancer genetics at high-resolution</article-title>. <source>Nucleic Acids Res</source>. <year>2017</year>;<volume>45</volume>(<issue>D1</issue>):<fpage>D777</fpage>–<lpage>D83</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkw1121" xlink:type="simple">10.1093/nar/gkw1121</ext-link></comment> <object-id pub-id-type="pmid">27899578</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Robert</surname> <given-names>SM</given-names></name>, <name name-style="western"><surname>Sontheimer</surname> <given-names>H</given-names></name>. <article-title>Glutamate transporters in the biology of malignant gliomas</article-title>. <source>Cellular and molecular life sciences</source>. <year>2014</year>;<volume>71</volume>(<issue>10</issue>):<fpage>1839</fpage>–<lpage>54</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00018-013-1521-z" xlink:type="simple">10.1007/s00018-013-1521-z</ext-link></comment> <object-id pub-id-type="pmid">24281762</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref053"><label>53</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yan</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Jurasz</surname> <given-names>P</given-names></name>. <article-title>The role of platelets in the tumor microenvironment: from solid tumors to leukemia</article-title>. <source>Biochimica et Biophysica Acta (BBA)-Molecular Cell Research</source>. <year>2016</year>;<volume>1863</volume>(<issue>3</issue>):<fpage>392</fpage>–<lpage>400</lpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref054"><label>54</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Choobdar</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Ahsen</surname> <given-names>ME</given-names></name>, <name name-style="western"><surname>Crawford</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Tomasoni</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Lamparter</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Open Community Challenge Reveals Molecular Network Modules with Key Roles in Diseases</article-title>. <source>bioRxiv</source>. <year>2018</year>:<fpage>265553</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref055"><label>55</label><mixed-citation publication-type="other" xlink:type="simple">Amstutz P, Andeer R, Chapman B, Chilton J, Crusoe MR, Valls Guimera R, et al. Common Workflow Language, Draft 3. 2016.</mixed-citation></ref>
<ref id="pbio.3000583.ref056"><label>56</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Liu</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>GY</given-names></name>, <name name-style="western"><surname>Dalerba</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Gurney</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Hoey</surname> <given-names>T</given-names></name>, <etal>et al</etal>. <article-title>The prognostic role of a gene signature from tumorigenic breast-cancer cells</article-title>. <source>N Engl J Med</source>. <year>2007</year>;<volume>356</volume>(<issue>3</issue>):<fpage>217</fpage>–<lpage>26</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1056/NEJMoa063994" xlink:type="simple">10.1056/NEJMoa063994</ext-link></comment> <object-id pub-id-type="pmid">17229949</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref057"><label>57</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yoshihara</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Shahmoradgoli</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Martinez</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Vegesna</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Kim</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Torres-Garcia</surname> <given-names>W</given-names></name>, <etal>et al</etal>. <article-title>Inferring tumour purity and stromal and immune cell admixture from expression data</article-title>. <source>Nat Commun</source>. <year>2013</year>;<volume>4</volume>:<fpage>2612</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ncomms3612" xlink:type="simple">10.1038/ncomms3612</ext-link></comment> <object-id pub-id-type="pmid">24113773</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref058"><label>58</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Li</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Severson</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Pignon</surname> <given-names>JC</given-names></name>, <name name-style="western"><surname>Zhao</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Novak</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Comprehensive analyses of tumor immunity: implications for cancer immunotherapy</article-title>. <source>Genome Biol</source>. <year>2016</year>;<volume>17</volume>(<issue>1</issue>):<fpage>174</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13059-016-1028-7" xlink:type="simple">10.1186/s13059-016-1028-7</ext-link></comment> <object-id pub-id-type="pmid">27549193</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref059"><label>59</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wilkerson</surname> <given-names>MD</given-names></name>, <name name-style="western"><surname>Yin</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Hoadley</surname> <given-names>KA</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Hayward</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Cabanski</surname> <given-names>CR</given-names></name>, <etal>et al</etal>. <article-title>Lung squamous cell carcinoma mRNA expression subtypes are reproducible, clinically important, and correspond to normal cell types</article-title>. <source>Clin Cancer Res</source>. <year>2010</year>;<volume>16</volume>(<issue>19</issue>):<fpage>4864</fpage>–<lpage>75</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1158/1078-0432.CCR-10-0199" xlink:type="simple">10.1158/1078-0432.CCR-10-0199</ext-link></comment> <object-id pub-id-type="pmid">20643781</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref060"><label>60</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang da</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Sherman</surname> <given-names>BT</given-names></name>, <name name-style="western"><surname>Lempicki</surname> <given-names>RA</given-names></name>. <article-title>Systematic and integrative analysis of large gene lists using DAVID bioinformatics resources</article-title>. <source>Nat Protoc</source>. <year>2009</year>;<volume>4</volume>(<issue>1</issue>):<fpage>44</fpage>–<lpage>57</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nprot.2008.211" xlink:type="simple">10.1038/nprot.2008.211</ext-link></comment> <object-id pub-id-type="pmid">19131956</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref061"><label>61</label><mixed-citation publication-type="other" xlink:type="simple">Page L, Brin S, Motwani R, Winograd T. The PageRank citation ranking: Bringing order to the web. Stanford InfoLab, 1999.</mixed-citation></ref>
<ref id="pbio.3000583.ref062"><label>62</label><mixed-citation publication-type="other" xlink:type="simple">Sun J, Qu H, Chakrabarti D, Faloutsos C, editors. Neighborhood formation and anomaly detection in bipartite graphs. Data Mining, Fifth IEEE International Conference on; 2005: IEEE.</mixed-citation></ref>
<ref id="pbio.3000583.ref063"><label>63</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Linkowski</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Blatti</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Kalari</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Sinha</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Vasudevan</surname> <given-names>S</given-names></name>. <article-title>Gene Sets Analysis using Network Patterns</article-title>. <source>bioRxiv</source>. <year>2019</year>:<fpage>629816</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/629816" xlink:type="simple">10.1101/629816</ext-link></comment></mixed-citation></ref>
<ref id="pbio.3000583.ref064"><label>64</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chen</surname> <given-names>EY</given-names></name>, <name name-style="western"><surname>Tan</surname> <given-names>CM</given-names></name>, <name name-style="western"><surname>Kou</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Duan</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Meirelles</surname> <given-names>GV</given-names></name>, <etal>et al</etal>. <article-title>Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool</article-title>. <source>BMC Bioinformatics</source>. <year>2013</year>;<volume>14</volume>:<fpage>128</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2105-14-128" xlink:type="simple">10.1186/1471-2105-14-128</ext-link></comment> <object-id pub-id-type="pmid">23586463</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref065"><label>65</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang da</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Sherman</surname> <given-names>BT</given-names></name>, <name name-style="western"><surname>Lempicki</surname> <given-names>RA</given-names></name>. <article-title>Bioinformatics enrichment tools: paths toward the comprehensive functional analysis of large gene lists</article-title>. <source>Nucleic Acids Res</source>. <year>2009</year>;<volume>37</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>13</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkn923" xlink:type="simple">10.1093/nar/gkn923</ext-link></comment> <object-id pub-id-type="pmid">19033363</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref066"><label>66</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hu</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Kadota</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Abnet</surname> <given-names>CC</given-names></name>, <name name-style="western"><surname>Su</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>H</given-names></name>, <etal>et al</etal>. <article-title>Genomic Landscape of Somatic Alterations in Esophageal Squamous Cell Carcinoma and Gastric Cancer</article-title>. <source>Cancer Res</source>. <year>2016</year>;<volume>76</volume>(<issue>7</issue>):<fpage>1714</fpage>–<lpage>23</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1158/0008-5472.CAN-15-0338" xlink:type="simple">10.1158/0008-5472.CAN-15-0338</ext-link></comment> <object-id pub-id-type="pmid">26857264</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref067"><label>67</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Song</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Feng</surname> <given-names>Z</given-names></name>. <article-title>Overexpression of FOXM1 as a target for malignant progression of esophageal squamous cell carcinoma</article-title>. <source>Oncol Lett</source>. <year>2018</year>;<volume>15</volume>(<issue>4</issue>):<fpage>5910</fpage>–<lpage>4</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3892/ol.2018.8035" xlink:type="simple">10.3892/ol.2018.8035</ext-link></comment> <object-id pub-id-type="pmid">29552222</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref068"><label>68</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Jiao</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Kong</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Yu</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Fang</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Nrf2 and Keap1 abnormalities in esophageal squamous cell carcinoma and association with the effect of chemoradiotherapy</article-title>. <source>Thorac Cancer</source>. <year>2018</year>;<volume>9</volume>(<issue>6</issue>):<fpage>726</fpage>–<lpage>35</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/1759-7714.12640" xlink:type="simple">10.1111/1759-7714.12640</ext-link></comment> <object-id pub-id-type="pmid">29675925</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref069"><label>69</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Szumilo</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Burdan</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Zinkiewicz</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Dudka</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Klepacz</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Dabrowski</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>Expression of syndecan-1 and cathepsins D and K in advanced esophageal squamous cell carcinoma</article-title>. <source>Folia Histochem Cytobiol</source>. <year>2009</year>;<volume>47</volume>(<issue>4</issue>):<fpage>571</fpage>–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.2478/v10042-008-0012-8" xlink:type="simple">10.2478/v10042-008-0012-8</ext-link></comment> <object-id pub-id-type="pmid">20430722</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref070"><label>70</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schmelzle</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Dizdar</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Matthaei</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Baldus</surname> <given-names>SE</given-names></name>, <name name-style="western"><surname>Wolters</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lindenlauf</surname> <given-names>N</given-names></name>, <etal>et al</etal>. <article-title>Esophageal cancer proliferation is mediated by cytochrome P450 2C9 (CYP2C9)</article-title>. <source>Prostaglandins Other Lipid Mediat</source>. <year>2011</year>;<volume>94</volume>(<issue>1–2</issue>):<fpage>25</fpage>–<lpage>33</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.prostaglandins.2010.12.001" xlink:type="simple">10.1016/j.prostaglandins.2010.12.001</ext-link></comment> <object-id pub-id-type="pmid">21167292</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref071"><label>71</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Langmead</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Nellore</surname> <given-names>A</given-names></name>. <article-title>Cloud computing for genomic data analysis and collaboration</article-title>. <source>Nat Rev Genet</source>. <year>2018</year>;<volume>19</volume>(<issue>4</issue>):<fpage>208</fpage>–<lpage>19</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nrg.2017.113" xlink:type="simple">10.1038/nrg.2017.113</ext-link></comment> <object-id pub-id-type="pmid">29379135</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref072"><label>72</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Afgan</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Baker</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Batut</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>van den Beek</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bouvier</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Cech</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2018 update</article-title>. <source>Nucleic Acids Res</source>. <year>2018</year>;<volume>46</volume>(<issue>W1</issue>):<fpage>W537</fpage>–<lpage>W44</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gky379" xlink:type="simple">10.1093/nar/gky379</ext-link></comment> <object-id pub-id-type="pmid">29790989</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref073"><label>73</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Birger</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Hanna</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Salinas</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Neff</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Saksena</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Livitz</surname> <given-names>D</given-names></name>, <etal>et al</etal>. <article-title>FireCloud, a scalable cloud-based platform for collaborative genome analysis: Strategies for reducing and controlling costs</article-title>. <source>bioRxiv</source>. <year>2017</year>:<fpage>209494</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref074"><label>74</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gao</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Aksoy</surname> <given-names>BA</given-names></name>, <name name-style="western"><surname>Dogrusoz</surname> <given-names>U</given-names></name>, <name name-style="western"><surname>Dresdner</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Gross</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Sumer</surname> <given-names>SO</given-names></name>, <etal>et al</etal>. <article-title>Integrative analysis of complex cancer genomics and clinical profiles using the cBioPortal</article-title>. <source>Sci Signal</source>. <year>2013</year>;<volume>6</volume>(<issue>269</issue>):<fpage>pl1</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/scisignal.2004088" xlink:type="simple">10.1126/scisignal.2004088</ext-link></comment> <object-id pub-id-type="pmid">23550210</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref075"><label>75</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Reich</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Liefeld</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Gould</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lerner</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Tamayo</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Mesirov</surname> <given-names>JP</given-names></name>. <article-title>GenePattern 2.0</article-title>. <source>Nat Genet</source>. <year>2006</year>;<volume>38</volume>(<issue>5</issue>):<fpage>500</fpage>–<lpage>1</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng0506-500" xlink:type="simple">10.1038/ng0506-500</ext-link></comment> <object-id pub-id-type="pmid">16642009</object-id>.</mixed-citation></ref>
<ref id="pbio.3000583.ref076"><label>76</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Baker</surname> <given-names>EJ</given-names></name>, <name name-style="western"><surname>Jay</surname> <given-names>JJ</given-names></name>, <name name-style="western"><surname>Bubier</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Langston</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Chesler</surname> <given-names>EJ</given-names></name>. <article-title>GeneWeaver: a web-based system for integrative functional genomics</article-title>. <source>Nucleic acids research</source>. <year>2011</year>;<volume>40</volume>(<issue>D1</issue>):<fpage>D1067</fpage>–<lpage>D76</lpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref077"><label>77</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Warde-Farley</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Donaldson</surname> <given-names>SL</given-names></name>, <name name-style="western"><surname>Comes</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Zuberi</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Badrawi</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Chao</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>The GeneMANIA prediction server: biological network integration for gene prioritization and predicting gene function</article-title>. <source>Nucleic Acids Res</source>. <year>2010</year>;<volume>38</volume>(<issue>Web Server issue</issue>):<fpage>W214</fpage>–<lpage>20</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkq537" xlink:type="simple">10.1093/nar/gkq537</ext-link></comment> <object-id pub-id-type="pmid">20576703</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref078"><label>78</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Qu</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Garamszegi</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Thorvaldsdottir</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Liefeld</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Ocana</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Integrative genomic analysis by interoperation of bioinformatics tools in GenomeSpace</article-title>. <source>Nature methods</source>. <year>2016</year>;<volume>13</volume>(<issue>3</issue>):<fpage>245</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nmeth.3732" xlink:type="simple">10.1038/nmeth.3732</ext-link></comment> <object-id pub-id-type="pmid">26780094</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref079"><label>79</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Floratos</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Smith</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Ji</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Watkinson</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Califano</surname> <given-names>A</given-names></name>. <article-title>geWorkbench: an open source platform for integrative genomics</article-title>. <source>Bioinformatics</source>. <year>2010</year>;<volume>26</volume>(<issue>14</issue>):<fpage>1779</fpage>–<lpage>80</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btq282" xlink:type="simple">10.1093/bioinformatics/btq282</ext-link></comment> <object-id pub-id-type="pmid">20511363</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref080"><label>80</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Margolin</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Nemenman</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Basso</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Wiggins</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Stolovitzky</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Dalla Favera</surname> <given-names>R</given-names></name>, <etal>et al</etal>., editors. <article-title>ARACNE: an algorithm for the reconstruction of gene regulatory networks in a mammalian cellular context</article-title>. <source>BMC bioinformatics</source>; <year>2006</year>: BioMed Central.</mixed-citation></ref>
<ref id="pbio.3000583.ref081"><label>81</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lefebvre</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Rajbhandari</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Alvarez</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Bandaru</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Lim</surname> <given-names>WK</given-names></name>, <name name-style="western"><surname>Sato</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>A human B-cell interactome identifies MYB and FOXM1 as master regulators of proliferation in germinal centers</article-title>. <source>Molecular systems biology</source>. <year>2010</year>;<fpage>6</fpage>(<issue>1</issue>).</mixed-citation></ref>
<ref id="pbio.3000583.ref082"><label>82</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alvarez</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Shen</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Giorgi</surname> <given-names>FM</given-names></name>, <name name-style="western"><surname>Lachmann</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Ding</surname> <given-names>BB</given-names></name>, <name name-style="western"><surname>Ye</surname> <given-names>BH</given-names></name>, <etal>et al</etal>. <article-title>Functional characterization of somatic mutations in cancer using network-based inference of protein activity</article-title>. <source>Nature genetics</source>. <year>2016</year>;<volume>48</volume>(<issue>8</issue>):<fpage>838</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/ng.3593" xlink:type="simple">10.1038/ng.3593</ext-link></comment> <object-id pub-id-type="pmid">27322546</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref083"><label>83</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wang</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Saito</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bisikirska</surname> <given-names>BC</given-names></name>, <name name-style="western"><surname>Alvarez</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Lim</surname> <given-names>WK</given-names></name>, <name name-style="western"><surname>Rajbhandari</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>Genome-wide identification of post-translational modulators of transcription factor activity in human B cells</article-title>. <source>Nature biotechnology</source>. <year>2009</year>;<volume>27</volume>(<issue>9</issue>):<fpage>829</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nbt.1563" xlink:type="simple">10.1038/nbt.1563</ext-link></comment> <object-id pub-id-type="pmid">19741643</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref084"><label>84</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kocher</surname> <given-names>SD</given-names></name>, <name name-style="western"><surname>Ayroles</surname> <given-names>JF</given-names></name>, <name name-style="western"><surname>Stone</surname> <given-names>EA</given-names></name>, <name name-style="western"><surname>Grozinger</surname> <given-names>CM</given-names></name>. <article-title>Individual variation in pheromone response correlates with reproductive traits and brain gene expression in worker honey bees</article-title>. <source>PLoS ONE</source>. <year>2010</year>;<volume>5</volume>(<issue>2</issue>):<fpage>e9116</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0009116" xlink:type="simple">10.1371/journal.pone.0009116</ext-link></comment> <object-id pub-id-type="pmid">20161742</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref085"><label>85</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wytock</surname> <given-names>TP</given-names></name>, <name name-style="western"><surname>Motter</surname> <given-names>AE</given-names></name>. <article-title>Predicting growth rate from gene expression</article-title>. <source>Proceedings of the National Academy of Sciences</source>. <year>2019</year>;<volume>116</volume>(<issue>2</issue>):<fpage>367</fpage>–<lpage>72</lpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref086"><label>86</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kapheim</surname> <given-names>KM</given-names></name>, <name name-style="western"><surname>Pan</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Salzberg</surname> <given-names>SL</given-names></name>, <name name-style="western"><surname>Puiu</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Magoc</surname> <given-names>T</given-names></name>, <etal>et al</etal>. <article-title>Genomic signatures of evolutionary transitions from solitary to group living</article-title>. <source>Science</source>. <year>2015</year>;<volume>348</volume>(<issue>6239</issue>):<fpage>1139</fpage>–<lpage>43</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/science.aaa4788" xlink:type="simple">10.1126/science.aaa4788</ext-link></comment> <object-id pub-id-type="pmid">25977371</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref087"><label>87</label><mixed-citation publication-type="other" xlink:type="simple">Nussbaumer T, Wagner C, Heidari P. TraitCorr—correlating gene expression measurements with phenotypic data. bioRxiv. 2019.</mixed-citation></ref>
<ref id="pbio.3000583.ref088"><label>88</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Costa-Silva</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Domingues</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Lopes</surname> <given-names>FM</given-names></name>. <article-title>RNA-Seq differential expression analysis: An extended review and a software tool</article-title>. <source>PloS one</source>. <year>2017</year>;<volume>12</volume>(<issue>12</issue>):<fpage>e0190152</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0190152" xlink:type="simple">10.1371/journal.pone.0190152</ext-link></comment> <object-id pub-id-type="pmid">29267363</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref089"><label>89</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Robinson</surname> <given-names>MD</given-names></name>, <name name-style="western"><surname>McCarthy</surname> <given-names>DJ</given-names></name>, <name name-style="western"><surname>Smyth</surname> <given-names>GK</given-names></name>. <article-title>edgeR: a Bioconductor package for differential expression analysis of digital gene expression data</article-title>. <source>Bioinformatics</source>. <year>2010</year>;<volume>26</volume>(<issue>1</issue>):<fpage>139</fpage>–<lpage>40</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btp616" xlink:type="simple">10.1093/bioinformatics/btp616</ext-link></comment> <object-id pub-id-type="pmid">19910308</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref090"><label>90</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huber</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Carey</surname> <given-names>VJ</given-names></name>, <name name-style="western"><surname>Gentleman</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Anders</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Carlson</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Carvalho</surname> <given-names>BS</given-names></name>, <etal>et al</etal>. <article-title>Orchestrating high-throughput genomic analysis with Bioconductor</article-title>. <source>Nature methods</source>. <year>2015</year>;<volume>12</volume>(<issue>2</issue>):<fpage>115</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nmeth.3252" xlink:type="simple">10.1038/nmeth.3252</ext-link></comment> <object-id pub-id-type="pmid">25633503</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref091"><label>91</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lightbody</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Haberland</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Fiona</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Taggart</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Zheng</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Parks</surname> <given-names>E</given-names></name>, <etal>et al</etal>. <article-title>Review of Applications of High-Throughput Sequencing in Personalised Medicine: Barriers and Facilitators of Future Progress in Research and Clinical Application</article-title>. <source>Briefings in bioinformatics</source>. <year>2018</year>:<fpage>bby051</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref092"><label>92</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Curtis</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Shah</surname> <given-names>SP</given-names></name>, <name name-style="western"><surname>Chin</surname> <given-names>S-F</given-names></name>, <name name-style="western"><surname>Turashvili</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Rueda</surname> <given-names>OM</given-names></name>, <name name-style="western"><surname>Dunning</surname> <given-names>MJ</given-names></name>, <etal>et al</etal>. <article-title>The genomic and transcriptomic architecture of 2,000 breast tumours reveals novel subgroups</article-title>. <source>Nature</source>. <year>2012</year>;<volume>486</volume>(<issue>7403</issue>):<fpage>346</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature10983" xlink:type="simple">10.1038/nature10983</ext-link></comment> <object-id pub-id-type="pmid">22522925</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref093"><label>93</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wang</surname> <given-names>W-H</given-names></name>, <name name-style="western"><surname>Xie</surname> <given-names>T-Y</given-names></name>, <name name-style="western"><surname>Xie</surname> <given-names>G-L</given-names></name>, <name name-style="western"><surname>Ren</surname> <given-names>Z-L</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>J-M</given-names></name>. <article-title>An Integrated Approach for Identifying Molecular Subtypes in Human Colon Cancer Using Gene Expression Data</article-title>. <source>Genes</source>. <year>2018</year>;<volume>9</volume>(<issue>8</issue>):<fpage>397</fpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref094"><label>94</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Søkilde</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Persson</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Ehinger</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Pirona</surname> <given-names>AC</given-names></name>, <name name-style="western"><surname>Fernö</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Hegardt</surname> <given-names>C</given-names></name>, <etal>et al</etal>. <article-title>Refinement of breast cancer molecular classification by miRNA expression profiles</article-title>. <source>BMC genomics</source>. <year>2019</year>;<volume>20</volume>(<issue>1</issue>):<fpage>503</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12864-019-5887-7" xlink:type="simple">10.1186/s12864-019-5887-7</ext-link></comment> <object-id pub-id-type="pmid">31208318</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref095"><label>95</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kuijjer</surname> <given-names>ML</given-names></name>, <name name-style="western"><surname>Paulson</surname> <given-names>JN</given-names></name>, <name name-style="western"><surname>Salzman</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Ding</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Quackenbush</surname> <given-names>J</given-names></name>. <article-title>Cancer subtype identification using somatic mutation data</article-title>. <source>British journal of cancer</source>. <year>2018</year>;<volume>118</volume>(<issue>11</issue>):<fpage>1492</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41416-018-0109-7" xlink:type="simple">10.1038/s41416-018-0109-7</ext-link></comment> <object-id pub-id-type="pmid">29765148</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref096"><label>96</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Väremo</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Henriksen</surname> <given-names>TI</given-names></name>, <name name-style="western"><surname>Scheele</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Broholm</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Pedersen</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Uhlén</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Type 2 diabetes and obesity induce similar transcriptional reprogramming in human myocytes</article-title>. <source>Genome medicine</source>. <year>2017</year>;<volume>9</volume>(<issue>1</issue>):<fpage>47</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13073-017-0432-2" xlink:type="simple">10.1186/s13073-017-0432-2</ext-link></comment> <object-id pub-id-type="pmid">28545587</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref097"><label>97</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Whitfield</surname> <given-names>CW</given-names></name>, <name name-style="western"><surname>Cziko</surname> <given-names>A-M</given-names></name>, <name name-style="western"><surname>Robinson</surname> <given-names>GE</given-names></name>. <article-title>Gene expression profiles in the brain predict behavior in individual honey bees</article-title>. <source>Science</source>. <year>2003</year>;<volume>302</volume>(<issue>5643</issue>):<fpage>296</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/science.1086807" xlink:type="simple">10.1126/science.1086807</ext-link></comment> <object-id pub-id-type="pmid">14551438</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref098"><label>98</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Narusaka</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Abe</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Kobayashi</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Kubo</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Kawai</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Izawa</surname> <given-names>N</given-names></name>, <etal>et al</etal>. <article-title>A model system to screen for candidate plant activators using an immune-induction system in Arabidopsis</article-title>. <source>Plant Biotechnology</source>. <year>2006</year>;<volume>23</volume>(<issue>3</issue>):<fpage>321</fpage>–<lpage>7</lpage>.</mixed-citation></ref>
<ref id="pbio.3000583.ref099"><label>99</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Subramanian</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Tamayo</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Mootha</surname> <given-names>VK</given-names></name>, <name name-style="western"><surname>Mukherjee</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Ebert</surname> <given-names>BL</given-names></name>, <name name-style="western"><surname>Gillette</surname> <given-names>MA</given-names></name>, <etal>et al</etal>. <article-title>Gene set enrichment analysis: a knowledge-based approach for interpreting genome-wide expression profiles</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2005</year>;<volume>102</volume>(<issue>43</issue>):<fpage>15545</fpage>–<lpage>50</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.0506580102" xlink:type="simple">10.1073/pnas.0506580102</ext-link></comment> <object-id pub-id-type="pmid">16199517</object-id></mixed-citation></ref>
<ref id="pbio.3000583.ref100"><label>100</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Saul</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Blatti</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Yang</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Bukhari</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Shpigler</surname> <given-names>HY</given-names></name>, <name name-style="western"><surname>Troy</surname> <given-names>JM</given-names></name>, <etal>et al</etal>. <article-title>Cross-species systems analysis of evolutionary toolkits of neurogenomic response to social challenge</article-title>. <source>Genes, Brain and Behavior</source>. <year>2019</year>;<volume>18</volume>(<issue>1</issue>):<fpage>e12502</fpage>.</mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="editor-report" id="pbio.3000583.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Richardson</surname>
<given-names>Lauren A</given-names>
</name>
<role>Senior Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Lauren A Richardson</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pbio.3000583" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">6 Jun 2019</named-content>
</p>
<p>Dear Dr Blatti, </p>
<p>Thank you for submitting your manuscript entitled "Knowledge-guided analysis of ‘omics’ data using the KnowEnG cloud platform" for consideration as a Methods and Resources by PLOS Biology.</p>
<p>Your manuscript has now been evaluated by the PLOS Biology editorial staff as well as by an academic editor with relevant expertise and I am writing to let you know that we would like to send your submission out for external peer review.</p>
<p>However, before we can send your manuscript to reviewers, we need you to complete your submission by providing the metadata that is required for full assessment. To this end, please login to Editorial Manager where you will find the paper in the 'Submissions Needing Revisions' folder on your homepage. Please click 'Revise Submission' from the Action Links and complete all additional questions in the submission questionnaire.</p>
<p>Please re-submit your manuscript within two working days, ie. by Jun 08 2019 11:59PM.</p>
<p>Login to Editorial Manager here: <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pbiology" xlink:type="simple">https://www.editorialmanager.com/pbiology</ext-link> </p>
<p>During resubmission, you will be invited to opt-in to posting your pre-review manuscript as a bioRxiv preprint. Visit <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosbiology/s/preprints" xlink:type="simple">http://journals.plos.org/plosbiology/s/preprints</ext-link> for full details. If you consent to posting your current manuscript as a preprint, please upload a single Preprint PDF when you re-submit. </p>
<p>Once your full submission is complete, your paper will undergo a series of checks in preparation for peer review. Once your manuscript has passed all checks it will be sent out for review. </p>
<p>Feel free to email us at <email xlink:type="simple">plosbiology@plos.org</email> if you have any queries relating to your submission.</p>
<p>Kind regards,</p>
<p>Lauren A Richardson, Ph.D</p>
<p>Senior Editor</p>
<p>PLOS Biology</p>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pbio.3000583.r002" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583.r002</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Roberts</surname>
<given-names>Roland G</given-names>
</name>
<role>Senior Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Roland G Roberts</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pbio.3000583" document-id-type="doi" document-type="article" id="rel-obj002" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">15 Jul 2019</named-content>
</p>
<p>Dear Dr Blatti,</p>
<p>Thank you very much for submitting your manuscript "Knowledge-guided analysis of ‘omics’ data using the KnowEnG cloud platform" for consideration as a Methods and Resources paper at PLOS Biology. Your manuscript has been evaluated by the PLOS Biology editors, an Academic Editor with relevant expertise, and by three independent reviewers.</p>
<p>You'll see that although reviewer #1 is positive about the paper, reviewers #2 and #3 raise significant concerns about the structure and presentation of the manuscript, asking for substantial additional information. In addition, I've included some comments provided by the Academic Editor (see foot of this letter) as this may further guide you in how to address the concerns. I should note that this is not the same person whose advice we sought regarding the initial decision to send your manuscript out for peer review (they were unfortunately not able to continue handling your paper).</p>
<p>Broadly speaking, the concerns seem to revolve around the lack of support for the FAIR credentials of KnowEnG (rev #2, AE), lack of evidence that KnowEnG is substantially distinct from a number of existing systems (rev #2, AE), apparent lack of scope for tailoring of analyses (rev #2), lack of evidence for modularity and future-proofing (rev #3). All of these aspects will need to be clarified and fully supported in order for us consider the manuscript further.</p>
<p>In light of the reviews (below), we will not be able to accept the current version of the manuscript, but we would welcome resubmission of a much-revised version that takes into account the reviewers' comments. We cannot make any decision about publication until we have seen the revised manuscript and your response to the reviewers' comments. Your revised manuscript is also likely to be sent for further evaluation by the reviewers.</p>
<p>Your revisions should address the specific points made by each reviewer and by the Academic Editor. Please submit a file detailing your responses to the editorial requests and a point-by-point response to all of the reviewers' comments that indicates the changes you have made to the manuscript. In addition to a clean copy of the manuscript, please upload a 'track-changes' version of your manuscript that specifies the edits made. This should be uploaded as a "Related" file type. You should also cite any additional relevant literature that has been published since the original submission and mention any additional citations in your response. </p>
<p>Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out.</p>
<p>Before you revise your manuscript, please review the following PLOS policy and formatting requirements checklist PDF: <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosbiology/s/file?id=9411/plos-biology-formatting-checklist.pdf" xlink:type="simple">http://journals.plos.org/plosbiology/s/file?id=9411/plos-biology-formatting-checklist.pdf</ext-link>. It is helpful if you format your revision according to our requirements - should your paper subsequently be accepted, this will save time at the acceptance stage.</p>
<p>Please note that as a condition of publication PLOS' data policy (<ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosbiology/s/data-availability" xlink:type="simple">http://journals.plos.org/plosbiology/s/data-availability</ext-link>) requires that you make available all data used to draw the conclusions arrived at in your manuscript. If you have not already done so, you must include any data used in your manuscript either in appropriate repositories, within the body of the manuscript, or as supporting information (N.B. this includes any numerical values that were used to generate graphs, histograms etc.). For an example see here: <ext-link ext-link-type="uri" xlink:href="http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1001908#s5" xlink:type="simple">http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1001908#s5</ext-link>.</p>
<p>For manuscripts submitted on or after 1st July 2019, we require the original, uncropped and minimally adjusted images supporting all blot and gel results reported in an article's figures or Supporting Information files. We will require these files before a manuscript can be accepted so please prepare them now, if you have not already uploaded them. Please carefully read our guidelines for how to prepare and upload this data: <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosbiology/s/figures#loc-blot-and-gel-reporting-requirements" xlink:type="simple">https://journals.plos.org/plosbiology/s/figures#loc-blot-and-gel-reporting-requirements</ext-link>.</p>
<p>Upon resubmission, the editors will assess your revision and if the editors and Academic Editor feel that the revised manuscript remains appropriate for the journal, we will send the manuscript for re-review. We aim to consult the same Academic Editor and reviewers for revised manuscripts but may consult others if needed.</p>
<p>We expect to receive your revised manuscript within two months. Please email us (<email xlink:type="simple">plosbiology@plos.org</email>) to discuss this if you have any questions or concerns, or would like to request an extension. At this stage, your manuscript remains formally under active consideration at our journal; please notify us by email if you do not wish to submit a revision and instead wish to pursue publication elsewhere, so that we may end consideration of the manuscript at PLOS Biology.</p>
<p>When you are ready to submit a revised version of your manuscript, please go to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pbiology/" xlink:type="simple">https://www.editorialmanager.com/pbiology/</ext-link> and log in as an Author. Click the link labelled 'Submissions Needing Revision' where you will find your submission record. </p>
<p>Thank you again for your submission to our journal. We hope that our editorial process has been constructive thus far, and we welcome your feedback at any time. Please don't hesitate to contact us if you have any questions or comments.</p>
<p>Sincerely,</p>
<p>Roli Roberts</p>
<p>Roland G Roberts, PhD, </p>
<p>Senior Editor</p>
<p>PLOS Biology</p>
<p>*****************************************************</p>
<p>REVIEWERS' COMMENTS:</p>
<p>Reviewer #1:</p>
<p>The paper describes KnowEnG computational system for analysis of genomics data sets. The system includes tools for popular omics data analysis tasks such as gene filtering and clustering, sample clustering, gene set analysis etc. In addition to standard tools, the system offers ‘knowledge-guided’ data-mining and machine learning algorithms using their massive Knowledge Network aggregating information about gene relationships from a wide range of resources. In addition to the use of the Knowledge Network, the KnowEnG tools are portable to diverse computing environments, are accessible via web-portal GUI’s and programmatically via Jupyter scripts. In the paper, the power of the system is demonstrated by applying complex sophisticated analytical pipelines in re-analysis of published cancer data sets. Authors also demonstrate interoperability with cloud-based data repositories for seamless access to large datasets.</p>
<p>The KnowEnG system is a significant and very welcome development that has potential of helping with “democratization” of analytical tools accessibility. In the use cases presented in the paper, authors demonstrate the power of the system in performing very complex analytical tasks on large scale multi-omic datasets in a few relatively straightforward steps. While there is always a learning curve in using any system capable of such complex analyses, KnowEnG pipelines provide shortcuts that could result in significant time saving for bioinformatics professionals, and it enables analyses that would otherwise not be accessible to non-specialists. The other potentially game changing aspect of the system is the Knowlege Network that aggregates a massive amount of prior knowledge about gene relationships. Authors describe use of the network by using individual subsets of the prior-knowledge and the ProGENI algorithm to transform input data using the chosen subnetwork, but it is likely that the network itself can become an important test-bed for novel computational algorithms leveraging the whole network. The system is cloud-based and thus offers an alternative to desktop based analysis environments. The advantage of this model has been demonstrated by seamless access to TCGA data sets via Seven Bridges cloud based management system. Altogether, a very nicely written paper describing a significant and potentially highly impactful new analytical platform for analysis of omics data.</p>
<p>Reviewer #2:</p>
<p>There are parts of this paper that I like, such as its focus on FAIR principles and use of modern platforms for implementing its functionality. Overall though, I feel that the benefits of the platform are oversold and/or unsubstantiated, and the focus of the paper is too mixed to have a clear impact.</p>
<p>Specifically, there are two focuses of the paper. The first is that KnowENG is a modern data sciences platform that is applicable to many users, e.g. as the authors state it marks "a significant step towards realizing the ‘FAIR’ vision". I don't find the support in the paper for this statement particularly compelling. There is no comparison to other extremely widely used platforms such as Galaxy, GenomeSpace, Terra, or DNANexus (some are not even mentioned). There is no discussion of how many users the platform has. Statements like "its tools being applicable to any data set 105 comprising gene-level measurements or scores for a collection of samples" are abstract and do not spell out why the tools are more valuable than those already available through other online platforms. Instead, there are three examples focused on cancer analysis, with only a reassurance that "The scope of KnowEnG analytics goes far beyond cancer analysis". It is possible that KnowENG does represent a significant leap as a general platform, but the manuscript does not support this claim adequately. Instead, it reads like a platform the authors developed informed by their research needs and now claim as relevant to other users without adequate evidence for this claim.</p>
<p>The second focus is that users can use KnowENG to reveal insights into their data. Perhaps this is my bias, but if I were to analyze an 'omics dataset I had generated, I would likely have a hypothesis in mind and care deeply about the underlying "knowledge graph" that was used to support clustering/prioritization/etc. For example, I may want to use a graph that only includes datasets in tissues relevant to my disease, or to exclude certain datasets I may not trust. Without the ability to tailor data/analysis to my hypothesis, the described workflows will only yield broad insights; e.g. "A log-rank test revealed highly significant distinction across the clusters in terms of survival probabilities (p-value 3.7E-33)". This is nice but leaves me unclear on what the next steps would be (are the results of a KnowENG analysis publishable without anything further? do they suggest an experiment?). Lines 165-174 seem to support this view:</p>
<p>165 Interestingly,</p>
<p>166 while there is a high concordance between tumor type and the COCA cluster labels of Hoadley</p>
<p>167 et al.[4] (ARI = 0.82), the same is not true for NBS-based clusters from the KnowEnG pipeline</p>
<p>168 (ARI = 0.13) or for the pathway-based clustering of mutation profiles in the original study (ARI =</p>
<p>169 0.13). In other words, knowledge-guided clustering finds groups of patient mutation profiles that</p>
<p>170 have strong correspondence with survival characteristics yet do not simply track tumor types,</p>
<p>171 suggesting alternative levels of molecular similarity. We explored this possibility in detail</p>
<p>172 (Supplementary Note SN7), and found the clusters to be characterized by mutations in genes</p>
<p>173 from specific and distinct pathways, even when they are mixed in terms of tumor type</p>
<p>174 representation</p>
<p>It is not surprising to me that these clusters would not match tumor types, since the KnowENG analysis was not designed to find tumor types (instead, its reliance on large-scale databases makes it unsurprising it clusters according to genes/pathways, since most databases will capture pathway information). What if I (as in Hoadley et al) wanted to cluster by tumor type? Likely I would need to take much more care in defining my underlying analysis and prior data; can I do that via KnowENG? What if I wanted to understand whether the KnowENG clustering was driven by one dataset, or "promiscuous" genes across the network that had a high weight in the analysis? Can I use KnowENG to conduct follow-up analyses?</p>
<p>It is possible that KnowENG in fact addresses my comments above. But the manuscript as written does not make it clear that it does. The manuscript should either (a) focus on the value of a new FAIR platform, by comparing KnowENG to existing platforms (see above), supporting that many users have found it to be of value, and being more clear on the value added by its tools; or (b) focus on KnowENG as enabling researchers to conduct specific analyses/workflows, by giving evidence that it yields publishable insights, or at minimum suggests follow-up experiments/analyses.</p>
<p>Reviewer #3:</p>
<p>Interpretation of the deluge of biomedical data, wether publicly available or user-generated, is at the basis for the majority of current research.</p>
<p>Tools like KnowEng represent the link between researchers with a strictly “wet” background” and the multitude of tools that allow meaningful interpretation.</p>
<p>We like the integrative approach of KnowEng and the fact that it is based on current and sensible software principles (i.e. interoperability, web-based access, etc).</p>
<p>If we were to evaluate this manuscript in terms of the concept it implements, our concerns would only be the fact that most bioinformatics analysis nowadays are made in the R framework, and we are not sure how this fits in the KnowEng framework. We routinely create custom scripts to perform very specific analysis steps, and if KnowEng does not allow for those to be integrated, it would affect the flexibility of the framework. </p>
<p>However, our main concern is related to the software engineering part of the software. </p>
<p>For example, we looked at the feature prioritization pipeline. This step is, in our opinion, one of the most crucial parts of every pipeline that makes use of high-throughput data. For example, in transcriptomic experiments, when all (or most) genes/transcripts are quantified, the presence of noise is inevitable, and feature prioritization is absolutely mandatory for removing such noise and extract meaningful variables.</p>
<p>KnowEng implements two very simple and outdated methods for the prioritization of important features. We do understand that these are very general methods and as such they will be appropriate (in that they do not make too many assumptions on the distribution of the data) for most datasets, but today, these would cut out the majority of the existing data. In particular, both Pearson correlation and Student’s t-test would be inappropriate for RNA-Seq data due to the particular distribution of count data (Negative binomial), and it seems that KnowEng does not allow for using appropriate tools like EdgeR or DeSeq. Even for microarray data we would never use such measures, instead preferring moderated t-test (as in the Limma package, see Smith et al 2005) or SAM (Tusher et al, 2001). </p>
<p>This only points to the main shortcoming of all bioinformatics pipeline frameworks: since it is impossibile to allow for all state of the art tools to be present in a single framework, the tool MUST be flexible enough for users to be able to easily plug in new blocks of the pipeline as needed. This ability needs to follow well established software engineering principles.</p>
<p>If KnowEng supports such flexibility, this is never explained in the text nor in the supplementary information. We understand that PlosBio could be seen as  more “bio” oriented, but it is a software tool we are talking about, and as such the software part has to be described in detail and justified. The results presented in the manuscript are great, but they represent a small set of datasets analyzed.</p>
<p>In order to accept this manuscript, we would need these aspects to be present and clear in the text.</p>
<p>COMMENTS FROM THE ACADEMIC EDITOR:</p>
<p>I have been through the work, read the reviews and also had a look at the website.  I too have major reservations about this paper. On the positive side, there has clearly been a lot of time and effort put into this platform and they have gone to considerable lengths to support users with the provision of tutorials etc. I too like the emphasis on cloud compute and FAIR principles, even if as reviewer 2 states they are invoked but it is not necessarily spelt out how they conform to them. The user interfaces also look pretty clean and useable/useful and KnowEnG may indeed be a useful new platform.</p>
<p>On the negative side I share many of the reservations of reviewers 2, 3.  I also did not like the way the resource was presented. Each functionality is described and then applied to a different ‘use case’ dataset and in each instance we were told that KnowEnG outperformed standard methods.  Ultimately I found this a pretty unsatisfactory way to present the functionality of the tool as the reader is not really given enough information about each dataset, the insights generated are provided out of context and I found these sections to just provide a superficial overview of a dataset I was not invested in and therefore it ended up being a dull read. Most the methods behind this tool are published we are told, so why do we need to see them applied and justified here?  For me a more descriptive work would have been better or even a protocols paper.  If there is a new resource such as this my main interest in reading about it are, how do I access it, what data do I need to feed in from which analysis platforms, what analysis routines can I run on the data and how do I do this, what form do the results come back to me in and what advantages does it offer relative to other tools/platforms.  At the moment, I don’t get this from the paper in its current form.</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pbio.3000583.r003">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583.r003</article-id>
<title-group>
<article-title>Author response to Decision Letter 1</article-title>
</title-group>
<related-object document-id="10.1371/journal.pbio.3000583" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj003" link-type="rebutted-decision-letter" object-id="10.1371/journal.pbio.3000583.r002" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">13 Oct 2019</named-content>
</p>
<supplementary-material id="pbio.3000583.s009" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s009" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to Reviewers.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pbio.3000583.r004" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583.r004</article-id>
<title-group>
<article-title>Decision Letter 2</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Roberts</surname>
<given-names>Roland G</given-names>
</name>
<role>Senior Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Roland G Roberts</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pbio.3000583" document-id-type="doi" document-type="article" id="rel-obj004" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">20 Nov 2019</named-content>
</p>
<p>Dear Dr Blatti,</p>
<p>Thank you for submitting your revised Methods and Resources entitled "Knowledge-guided analysis of ‘omics’ data using the KnowEnG cloud platform" for publication in PLOS Biology. I have now obtained advice from two of the original reviewers and have discussed their comments with the Academic Editor. </p>
<p>While reviewer #2 remains to be fully convinced, the Academic Editor is persuaded by your responses to her/his own comments, and by your revisions overall, that we should probably accept your manuscript for publication, assuming that you will modify the manuscript to address the remaining points raised by the reviewers. Please also make sure to address the data and other policy-related requests noted at the end of this email.</p>
<p>IMPORTANT: The Academic Editor feels that your manuscript is still lacking in clarity, and that its length may be precluding a clear impression of the advance. Specifically, s/he says "My inclination is that you accept on the proviso that they look at what they have, which has probably been expanded considerably in the trying to address all our comments and seek to reduce the work, not because they have to, as you say there is no limit on M&amp;R papers but because it would be better vehicle to advertise their resource - which is clearly what they are trying very hard to achieve." The Academic Editor is essentially pointing out that while shortening the manuscript is not required to meet any format requirements for the journal, it may be in your interests (in order to maximise appeal and uptake from the readers) to take a long cold look at your paper with the eyes of an outsider, and see whether you can prune it into a more accessible shape.</p>
<p>We expect to receive your revised manuscript within two weeks. Your revisions should address the specific points made by each reviewer. In addition to the remaining revisions and before we will be able to formally accept your manuscript and consider it "in press", we also need to ensure that your article conforms to our guidelines. A member of our team will be in touch shortly with a set of requests. As we can't proceed until these requirements are met, your swift response will help prevent delays to publication.</p>
<p>*Copyediting*</p>
<p>Upon acceptance of your article, your final files will be copyedited and typeset into the final PDF. While you will have an opportunity to review these files as proofs, PLOS will only permit corrections to spelling or significant scientific errors. Therefore, please take this final revision time to assess and make any remaining major changes to your manuscript.</p>
<p>NOTE: If Supporting Information files are included with your article, note that these are not copyedited and will be published as they are submitted. Please ensure that these files are legible and of high quality (at least 300 dpi) in an easily accessible file format. For this reason, please be aware that any references listed in an SI file will not be indexed. For more information, see our Supporting Information guidelines:</p>
<p><ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosbiology/s/supporting-information" xlink:type="simple">https://journals.plos.org/plosbiology/s/supporting-information</ext-link> </p>
<p>*Published Peer Review History*</p>
<p>Please note that you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out. Please see here for more details:</p>
<p><ext-link ext-link-type="uri" xlink:href="https://blogs.plos.org/plos/2019/05/plos-journals-now-open-for-published-peer-review/" xlink:type="simple">https://blogs.plos.org/plos/2019/05/plos-journals-now-open-for-published-peer-review/</ext-link></p>
<p>*Early Version*</p>
<p>Please note that an uncorrected proof of your manuscript will be published online ahead of the final version, unless you opted out when submitting your manuscript. If, for any reason, you do not want an earlier version of your manuscript published online, uncheck the box. Should you, your institution's press office or the journal office choose to press release your paper, you will automatically be opted out of early publication. We ask that you notify us as soon as possible if you or your institution is planning to press release the article.</p>
<p>*Submitting Your Revision*</p>
<p>To submit your revision, please go to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pbiology/" xlink:type="simple">https://www.editorialmanager.com/pbiology/</ext-link> and log in as an Author. Click the link labelled 'Submissions Needing Revision' to find your submission record. Your revised submission must include a cover letter, a Response to Reviewers file that provides a detailed response to the reviewers' comments (if applicable), and a track-changes file indicating any changes that you have made to the manuscript. </p>
<p>Please do not hesitate to contact me should you have any questions.</p>
<p>Sincerely,</p>
<p>Roli Roberts</p>
<p>Roland G Roberts, PhD, </p>
<p>Senior Editor</p>
<p>PLOS Biology</p>
<p>------------------------------------------------------------------------</p>
<p>DATA POLICY:</p>
<p>You may be aware of the PLOS Data Policy, which requires that all data be made available without restriction: <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosbiology/s/data-availability" xlink:type="simple">http://journals.plos.org/plosbiology/s/data-availability</ext-link>. For more information, please also see this editorial: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pbio.1001797" xlink:type="simple">http://dx.doi.org/10.1371/journal.pbio.1001797</ext-link></p>
<p>Note that we do not require all raw data. Rather, we ask that all individual quantitative observations that underlie the data summarized in the figures and results of your paper be made available in one of the following forms:</p>
<p>1) Supplementary files (e.g., excel). Please ensure that all data files are uploaded as 'Supporting Information' and are invariably referred to (in the manuscript, figure legends, and the Description field when uploading your files) using the following format verbatim: S1 Data, S2 Data, etc. Multiple panels of a single or even several figures can be included as multiple sheets in one excel file that is saved using exactly the following convention: S1_Data.xlsx (using an underscore).</p>
<p>2) Deposition in a publicly available repository. Please also provide the accession code or a reviewer link so that we may view your data before publication. </p>
<p>I should say that it seems that all of the main Figs and most of the Supp Figs are either schematics or screenshots/outputs from the KnowEng platform itself (and so depend on data that can be accessed and manipulated through that portal). However, there are a few Supp Figs which look like they have a simpler structure, and may be presenting data that are not directly output from KnowEng. If this is the case, regardless of the method selected, please ensure that you provide the individual numerical values that underlie the summary data displayed in those figure panels as they are essential for readers to assess your analysis and to reproduce it. NOTE: the numerical data provided should include all replicates AND the way in which the plotted mean and errors were derived (it should not present only the mean/average values).</p>
<p>Please also ensure that figure legends in your manuscript include information on where the underlying data can be found, and ensure your supplemental data file/s has a legend.</p>
<p>Please ensure that your Data Statement in the submission system accurately describes where your data can be found.</p>
<p>------------------------------------------------------------------------</p>
<p>REVIEWERS' COMMENTS:</p>
<p>Reviewer #2:</p>
<p>I appreciate the authors response to my comments. In particular, the description of how KnowEng fits in with other platforms is very useful, and I think significantly improves the paper.</p>
<p>I think it is clear that KnowEng is distinct from these other platforms. But it seems different mostly in degree, not in kind, and it is still not clear how big of a need it fills. "Knowledge-guided analysis" is not something I have heard used before, and I am not sure how big of a field it is (I could not tell if this is a new term defined to describe KnowEng, or if it was a pre-existing gap for a lot of users).</p>
<p>The example use cases are fine. But they are too detailed for somebody outside of the field of cancer genomic to really grasp the big picture. On the other hand, the high-level descriptions of "clustering" or "gene prioritization" are so broad that it's hard to see what is provided by KnowEng. It seems like the right level of detail would be somewhere in between, where a broad need were motivated and then addressed. I think the long responses to my comments illustrate the challenges in explaining the value of a system like this to somebody who has not developed it or used it closely.</p>
<p>In short, while I see the potential value and novelty of the platform, the current manuscript does not make it clear to me that it is the major advance that the abstract claims.</p>
<p>Reviewer #3:</p>
<p>Some of my concerns are still there, in particular the fact that the tool requires the maintainer to put an effort in including new analysis approaches. However, this is true for most available pipeline managers like KnowEng. </p>
<p>Most of my other comments have been addressed appropriately, and the amount of work that was put in the revision gives me hope that the authors will keep maintaining and developing KnowEng into a more complete framework.</p>
<p>--------------------</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pbio.3000583.r005">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583.r005</article-id>
<title-group>
<article-title>Author response to Decision Letter 2</article-title>
</title-group>
<related-object document-id="10.1371/journal.pbio.3000583" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj005" link-type="rebutted-decision-letter" object-id="10.1371/journal.pbio.3000583.r004" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>3</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">16 Dec 2019</named-content>
</p>
<supplementary-material id="pbio.3000583.s010" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pbio.3000583.s010" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response_to_Reviewers.rev2.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pbio.3000583.r006" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pbio.3000583.r006</article-id>
<title-group>
<article-title>Decision Letter 3</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Roberts</surname>
<given-names>Roland G</given-names>
</name>
<role>Senior Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Roland G Roberts</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pbio.3000583" document-id-type="doi" document-type="article" id="rel-obj006" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>3</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">19 Dec 2019</named-content>
</p>
<p>Dear Dr Blatti III,</p>
<p>On behalf of my colleagues and the Academic Editor, Thomas C. Freeman, I am pleased to inform you that we will be delighted to publish your Methods and Resources in PLOS Biology. </p>
<p>The files will now enter our production system. You will receive a copyedited version of the manuscript, along with your figures for a final review. You will be given two business days to review and approve the copyedit. Then, within a week, you will receive a PDF proof of your typeset article. You will have two days to review the PDF and make any final corrections. If there is a chance that you'll be unavailable during the copy editing/proof review period, please provide us with contact details of one of the other authors whom you nominate to handle these stages on your behalf. This will ensure that any requested corrections reach the production department in time for publication.</p>
<p>Early Version</p>
<p>The version of your manuscript submitted at the copyedit stage will be posted online ahead of the final proof version, unless you have already opted out of the process. The date of the early version will be your article's publication date. The final article will be published to the same URL, and all versions of the paper will be accessible to readers.</p>
<p>PRESS </p>
<p>We frequently collaborate with press offices. If your institution or institutions have a press office, please notify them about your upcoming paper at this point, to enable them to help maximise its impact. If the press office is planning to promote your findings, we would be grateful if they could coordinate with <email xlink:type="simple">biologypress@plos.org</email>. If you have not yet opted out of the early version process, we ask that you notify us immediately of any press plans so that we may do so on your behalf.</p>
<p>We also ask that you take this opportunity to read our Embargo Policy regarding the discussion, promotion and media coverage of work that is yet to be published by PLOS. As your manuscript is not yet published, it is bound by the conditions of our Embargo Policy. Please be aware that this policy is in place both to ensure that any press coverage of your article is fully substantiated and to provide a direct link between such coverage and the published work. For full details of our Embargo Policy, please visit <ext-link ext-link-type="uri" xlink:href="http://www.plos.org/about/media-inquiries/embargo-policy/" xlink:type="simple">http://www.plos.org/about/media-inquiries/embargo-policy/</ext-link>.</p>
<p>Thank you again for submitting your manuscript to PLOS Biology and for your support of Open Access publishing. Please do not hesitate to contact me if I can provide any assistance during the production process.</p>
<p>Kind regards,</p>
<p>Sofia Vickers</p>
<p>Senior Publications Assistant</p>
<p>PLOS Biology</p>
<p>On behalf of,  </p>
<p>Roland Roberts,</p>
<p>Senior Editor</p>
<p>PLOS Biology</p>
</body>
</sub-article>
</article>