<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-20-01343</article-id>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Network analysis</subject><subj-group><subject>Metabolic networks</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Gene identification and analysis</subject><subj-group><subject>Genetic networks</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Network analysis</subject><subj-group><subject>Genetic networks</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Gene expression</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Biochemistry</subject><subj-group><subject>Metabolism</subject><subj-group><subject>Metabolic pathways</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Gene prediction</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Gene prediction</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Cell biology</subject><subj-group><subject>Cell physiology</subject><subj-group><subject>Cell metabolism</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Animal studies</subject><subj-group><subject>Experimental organism systems</subject><subj-group><subject>Model organisms</subject><subj-group><subject>Saccharomyces cerevisiae</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Model organisms</subject><subj-group><subject>Saccharomyces cerevisiae</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Fungi</subject><subj-group><subject>Yeast</subject><subj-group><subject>Saccharomyces</subject><subj-group><subject>Saccharomyces cerevisiae</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Animal studies</subject><subj-group><subject>Experimental organism systems</subject><subj-group><subject>Yeast and fungal models</subject><subj-group><subject>Saccharomyces cerevisiae</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Biochemistry</subject><subj-group><subject>Enzymology</subject><subj-group><subject>Enzyme chemistry</subject><subj-group><subject>Enzyme metabolism</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks</article-title>
<alt-title alt-title-type="running-head">DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-4938-4418</contrib-id>
<name name-style="western">
<surname>Rodríguez-Mier</surname> <given-names>Pablo</given-names></name>
<role content-type="https://casrai.org/credit/">Conceptualization</role>
<role content-type="https://casrai.org/credit/">Data curation</role>
<role content-type="https://casrai.org/credit/">Formal analysis</role>
<role content-type="https://casrai.org/credit/">Investigation</role>
<role content-type="https://casrai.org/credit/">Methodology</role>
<role content-type="https://casrai.org/credit/">Software</role>
<role content-type="https://casrai.org/credit/">Validation</role>
<role content-type="https://casrai.org/credit/">Visualization</role>
<role content-type="https://casrai.org/credit/">Writing – original draft</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-3393-1405</contrib-id>
<name name-style="western">
<surname>Poupin</surname> <given-names>Nathalie</given-names></name>
<role content-type="https://casrai.org/credit/">Conceptualization</role>
<role content-type="https://casrai.org/credit/">Investigation</role>
<role content-type="https://casrai.org/credit/">Methodology</role>
<role content-type="https://casrai.org/credit/">Software</role>
<role content-type="https://casrai.org/credit/">Supervision</role>
<role content-type="https://casrai.org/credit/">Validation</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0003-2388-2555</contrib-id>
<name name-style="western">
<surname>de Blasio</surname> <given-names>Carlo</given-names></name>
<role content-type="https://casrai.org/credit/">Resources</role>
<role content-type="https://casrai.org/credit/">Validation</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Le Cam</surname> <given-names>Laurent</given-names></name>
<role content-type="https://casrai.org/credit/">Funding acquisition</role>
<role content-type="https://casrai.org/credit/">Project administration</role>
<role content-type="https://casrai.org/credit/">Resources</role>
<role content-type="https://casrai.org/credit/">Validation</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-9401-2894</contrib-id>
<name name-style="western">
<surname>Jourdan</surname> <given-names>Fabien</given-names></name>
<role content-type="https://casrai.org/credit/">Conceptualization</role>
<role content-type="https://casrai.org/credit/">Funding acquisition</role>
<role content-type="https://casrai.org/credit/">Investigation</role>
<role content-type="https://casrai.org/credit/">Methodology</role>
<role content-type="https://casrai.org/credit/">Project administration</role>
<role content-type="https://casrai.org/credit/">Supervision</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001">
<label>1</label>
<addr-line>Toxalim (Research Centre in Food Toxicology), Université de Toulouse, INRAE, ENVT, INP-Purpan, UPS, Toulouse, France</addr-line>
</aff>
<aff id="aff002">
<label>2</label>
<addr-line>IRCM, Institut de Recherche en Cancérologie de Montpellier, INSERM U1194, Université de Montpellier, Institut régional du Cancer de Montpellier, Montpellier, France</addr-line>
</aff>
<aff id="aff003">
<label>3</label>
<addr-line>Equipe Labellisée par la Ligue contre le Cancer, Paris, France</addr-line>
</aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Kaleta</surname> <given-names>Christoph</given-names></name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1">
<addr-line>Christian Albrechts Universitat zu Kiel, GERMANY</addr-line>
</aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">fabien.jourdan@inrae.fr</email></corresp>
</author-notes>
<pub-date pub-type="collection">
<month>2</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="epub">
<day>11</day>
<month>2</month>
<year>2021</year>
</pub-date>
<volume>17</volume>
<issue>2</issue>
<elocation-id>e1008730</elocation-id>
<history>
<date date-type="received">
<day>28</day>
<month>7</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>21</day>
<month>1</month>
<year>2021</year>
</date>
</history>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Rodríguez-Mier et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1008730"/>
<abstract>
<p>The correct identification of metabolic activity in tissues or cells under different conditions can be extremely elusive due to mechanisms such as post-transcriptional modification of enzymes or different rates in protein degradation, making difficult to perform predictions on the basis of gene expression alone. Context-specific metabolic network reconstruction can overcome some of these limitations by leveraging the integration of multi-omics data into genome-scale metabolic networks (GSMN). Using the experimental information, context-specific models are reconstructed by extracting from the generic GSMN the sub-network most consistent with the data, subject to biochemical constraints. One advantage is that these context-specific models have more predictive power since they are tailored to the specific tissue, cell or condition, containing only the reactions predicted to be active in such context. However, an important limitation is that there are usually many different sub-networks that optimally fit the experimental data. This set of optimal networks represent alternative explanations of the possible metabolic state. Ignoring the set of possible solutions reduces the ability to obtain relevant information about the metabolism and may bias the interpretation of the true metabolic states. In this work we formalize the problem of enumerating optimal metabolic networks and we introduce <monospace>DEXOM</monospace>, an unified approach for diversity-based enumeration of context-specific metabolic networks. We developed different strategies for this purpose and we performed an exhaustive analysis using simulated and real data. In order to analyze the extent to which these results are biologically meaningful, we used the alternative solutions obtained with the different methods to measure: 1) the improvement of in silico predictions of essential genes in <italic>Saccharomyces cerevisiae</italic> using ensembles of metabolic network; and 2) the detection of alternative enriched pathways in different human cancer cell lines. We also provide <monospace>DEXOM</monospace> as an open-source library compatible with COBRA Toolbox 3.0, available at <ext-link ext-link-type="uri" xlink:href="https://github.com/MetExplore/dexom" xlink:type="simple">https://github.com/MetExplore/dexom</ext-link>.</p>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>Understanding deregulations of metabolism based on isolated measures of gene expression or protein or metabolite concentrations is a challenging task due to the interconnection of multiple processes. One solution is to extract, from generic genome-scale metabolic networks, the specific sub-network which is modulated in the studied condition. Many algorithms have been proposed for such context-specific network extraction based on experimental measurements. However, this process is subject to some randomness and variability, since multiple metabolic networks can model the metabolic state in a similarly adequate manner for the same experimental data. This means that for a given data and reconstruction method, there are usually multiple solutions that satisfy the same constraints and with the same quality, but only one solution is returned by the commonly used reconstruction methods. Here, we formalize this problem and we propose and analyze different methods to obtain diverse samples of metabolic sub-networks. We evaluate them by performing an extensive comparison and we show how the different sets of optimal networks discovered by the different methods are biological meaningful by constructing ensembles of networks to improve the prediction of essential genes in <italic>Saccharomyces cerevisiae</italic> and to detect enriched metabolic pathways in four different human cancer cell lines.</p>
</abstract>
<funding-group>
<funding-statement>This work was supported by grants from the Institut National contre le Cancer (INCa-PLBIO) and the Labex EpiGenMed ("Investissements d’avenir" program, reference ANR-10-LABX-12-01). This work has also received funding from the European Union’s Horizon 2020 research and innovation program under grant agreement GOLIATH No. 825489, and from the Association pour la Recherche contre le Cancer (ARC) Foundation. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="9"/>
<table-count count="2"/>
<page-count count="34"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>PLOS Publication Stage</meta-name>
<meta-value>vor-update-to-uncorrected-proof</meta-value>
</custom-meta>
<custom-meta>
<meta-name>Publication Update</meta-name>
<meta-value>2021-02-24</meta-value>
</custom-meta>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All data and methods are available from <ext-link ext-link-type="uri" xlink:href="https://github.com/MetExplore/dexom" xlink:type="simple">https://github.com/MetExplore/dexom</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<disp-quote><p>This is a <italic>PLOS Computational Biology</italic> Methods paper.</p></disp-quote>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Metabolism and its regulation is an ensemble of intricate and tightly coordinated processes involving hundreds to thousands of enzymes, reactions, metabolites and genes, whose interactions define complex networks that are unique for each species. This complexity grants organisms the flexibility to adapt their energetic functions and growth requirements to a wide variety of conditions. Changes in nutrient availability, conditions of cellular stress, or any other change in the environment can induce a rapid metabolic reprogramming of cells, rewiring their metabolism to adjust to the requirements of the new situation. Dysfunction of these mechanisms play a central role in the development of many diseases, but most notably in cancer, where cancer cells exploit metabolic reprogramming on their own benefit [<xref ref-type="bibr" rid="pcbi.1008730.ref001">1</xref>] to sustain a rapid proliferation rate and survive in conditions of hypoxia, nutrient depletion, or even develop therapy resistance [<xref ref-type="bibr" rid="pcbi.1008730.ref002">2</xref>]. Being able to accurately detect these changes or deregulations in metabolism would be beneficial not only for a better understanding of biological systems but to develop more targeted therapies and treatments for many diseases [<xref ref-type="bibr" rid="pcbi.1008730.ref003">3</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref005">5</xref>].</p>
<p>One of the reasons why this task remains elusive is the complexity of the multiple processes that participate in the regulation of the metabolism [<xref ref-type="bibr" rid="pcbi.1008730.ref006">6</xref>]. More specifically, post-transcriptional control of mRNA, post-translational modifications of enzymes, as well as biochemical constraints —including for example the laws for mass and charge conservation, cell growth requirements, biomass composition and nutrient availability— make the identification of which pathways are altered between conditions very complicated by the mere observation of changes in gene expression or changes in metabolite concentrations. Instead, integrating and analyzing together all those different levels of information is key to improve the predictive models and to provide a more accurate mechanistic view of the system under study.</p>
<p>Genome-scale metabolic networks (GSMN) are suitable computational models for the integration of these multiple levels of knowledge. These models are automatically built and manually curated networks that encode all reactions with their stoichiometric coefficients, metabolites, enzymes, gene annotations and biochemical constraints that are known for an organism. GSMNs are generic models of an organism, independent of the type of tissue, cell or condition. In order to generate more accurate models for specific tissues or conditions, experimental data such as gene or protein expression can be integrated on top of GSMNs using context-specific network reconstruction methods. Taking into account the different levels of expression of genes between conditions, a sub-network from the GSMN is extracted by finding a steady-state flux most consistent with the experimental data. This process allows the generation of metabolic networks specifically tailored to the condition, to highlight for example differences in metabolism between tissues [<xref ref-type="bibr" rid="pcbi.1008730.ref007">7</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref009">9</xref>] or to discover novel drug targets or essential genes in cancer cells [<xref ref-type="bibr" rid="pcbi.1008730.ref010">10</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref012">12</xref>].</p>
<p>Several methods were proposed in the literature to automatically reconstruct context-specific metabolic networks from gene or protein expression, mostly based on Linear Programming (LP) or Mixed Integer Linear Programming (MILP) models [<xref ref-type="bibr" rid="pcbi.1008730.ref007">7</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref009">9</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref013">13</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref017">17</xref>], as well as benchmarks comparing their capabilities [<xref ref-type="bibr" rid="pcbi.1008730.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref019">19</xref>]. This process is done by solving an optimization problem to find the sub-network from the GSMN that maximizes the agreement with the experimental data. This agreement is defined in different ways: some methods such as [<xref ref-type="bibr" rid="pcbi.1008730.ref007">7</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref015">15</xref>] use data to classify reactions into reactions associated to highly expressed enzymes (or core reactions) or reactions associated to lowly expressed enzymes, whereas others [<xref ref-type="bibr" rid="pcbi.1008730.ref008">8</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref014">14</xref>] assign different scores (weights) to reactions based on data and other experimental evidence. The optimization problem is then defined as that of finding the sub-networks that can carry a steady-state flux through the reactions that maximize the overall score. However, a major limitation that is frequently neglected is that the available information is usually not sufficient to fully and unambiguously characterize the corresponding metabolic sub-network for a given condition. Instead, a range of different optimal metabolic sub-networks may exist, offering different hypotheses of the possible metabolic state. In other words, for a given experimental data, reconstruction method, and pre-processing method to score the importance of the reactions (e.g., threshold-based methods to classify reactions into active or non active), there exist an unknown amount of possible metabolic sub-networks (solutions) that are equally valid (optimal) in terms of agreement with experimental data, but only one of these solutions (without knowing which one a priori) is returned by the commonly used context-specific reconstruction methods. Ignoring this variability can not only lead to incorrect or incomplete explanations of the biological experiment, but also causes valuable information to be lost that could be used to improve predictions. Although this limitation is starting to be acknowledged [<xref ref-type="bibr" rid="pcbi.1008730.ref020">20</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref022">22</xref>], there is still a lack of studies that analyze the computational problem and that provide methods to sample or enumerate the optimal space of alternative networks.</p>
<p>The problem of exploring multiple solutions in the context of metabolic networks was already carried out for Flux Balance Analysis [<xref ref-type="bibr" rid="pcbi.1008730.ref023">23</xref>], but barely analyzed for context-specific network reconstruction, where both the type of the problem and purpose are different. One of the initial works that exploits the idea of multiple context-specific networks to improve predictions is <monospace>EXAMO</monospace> [<xref ref-type="bibr" rid="pcbi.1008730.ref021">21</xref>]. In this work, authors perform an enumeration of optimal metabolic networks using <monospace>iMAT</monospace> [<xref ref-type="bibr" rid="pcbi.1008730.ref007">7</xref>]. The enumeration is done using the same strategy proposed in <monospace>iMAT</monospace> for assigning confidence scores to reactions, followed by a post-processing step using the <monospace>MBA</monospace> [<xref ref-type="bibr" rid="pcbi.1008730.ref013">13</xref>] algorithm to generate a single consensus network including the reactions predicted to be active. A similar strategy was applied by Poupin et al. [<xref ref-type="bibr" rid="pcbi.1008730.ref020">20</xref>], but instead of generating a single consensus network, the whole set of networks derived by forcing fluxes through each reaction in the model is preserved as alternative hypotheses of the metabolic state. This enables a better characterization of the metabolic shifts that occur during hepatic differentiation.</p>
<p>The procedure of generating alternative networks by forcing or blocking flux through each reaction has however some limitations. First, it can generate many duplicated solutions. For example, if there exist only one optimal metabolic network with a linear pathway of 10 reactions, forcing the activation of each reaction in the linear pathway will generate always the same optimal solution, wasting computational resources. Second, it cannot recover the whole set of possible optimal metabolic networks, as not all possible combinations of reactions are tested. Third, there is no guarantee that the solution set is representative and diverse of the full space of possible networks. A simple brute force algorithm that could be used to prevent this would be to test every possible combination between variable reactions. However, this approach does not scale as the number of problems to solve grows exponentially with the number of variable reactions. As an alternative to this approach, authors in [<xref ref-type="bibr" rid="pcbi.1008730.ref022">22</xref>] present a strategy to generate alternative metabolic networks. Of particular interest is their <monospace>CorEx</monospace> algorithm, which in a similar fashion as <monospace>Fastcore</monospace> method [<xref ref-type="bibr" rid="pcbi.1008730.ref015">15</xref>], calculates the smallest flux-consistent sub-network that preserve the reactions in the core set, but solving the problem exactly instead of the LP-based fast approximations used in <monospace>Fastcore</monospace>. <monospace>CorEx</monospace> also incorporates a mechanism to enumerate optimal networks by maximizing the dissimilarity with the previously found solution, a process that can be repeated iteratively to discover new optimal networks. However, without a mechanism that prevents the generation of duplicated solutions, the enumeration process can get stuck in a small region in the space of optimal solutions. Some issues still remain with this enumeration strategy, mainly regarding its effectiveness to get a representative set of the possible metabolic networks and also how to take advantage of the set of networks to improve predictions more than just only observing the variability in terms of reactions that can appear or not in the different optimal sub-networks.</p>
<p>Regarding this last question, it was shown that the use of ensembles of draft networks reconstructed using Gap Filling methods with multiple media conditions and random perturbations can improve flux-based predictions [<xref ref-type="bibr" rid="pcbi.1008730.ref024">24</xref>]. Although the application is different, predictions using context-specific network reconstruction methods could be also improved using ensembles of optimal metabolic networks, and diversity can play an important role in the quality of the ensemble models.</p>
<p>In this work, we advocate for generating a diverse set of solutions, that is, given some experimental condition for which we cannot characterize the metabolic state with just one optimal network, we want to obtain a sample of this largely unknown set of possible networks in a way that covers well the range of possibilities. In other words: if large differences in metabolism can be explained by the same experimental data, we want to obtain a diverse set of these optimal networks that capture those different metabolic states. This usually means exploring distant solutions with changes that correspond also to distant pathways.</p>
<p>The concept of diversity of optimal solutions of a MILP problem is not well explored in metabolic network reconstruction, and only marginally analyzed in combinatorial optimization. Of special interest is the sequential MILP approach proposed by Danna et al. [<xref ref-type="bibr" rid="pcbi.1008730.ref025">25</xref>], in which they propose an enumeration strategy which incorporates the concept of diversity by maximizing the distance to previously found solutions at the same time that they discard visited solutions. The closest concept to this general strategy applied to the enumeration of optimal context-specific metabolic networks can be found in [<xref ref-type="bibr" rid="pcbi.1008730.ref022">22</xref>], where Robaina et al. incorporate the idea of maximizing the distance to the previous solution, but without a mechanism that would remove already explored solutions.</p>
<p>Although maximizing the distance may seem like a good idea a priori, in practice it can lead to oscillations in the search, in which the search process jumps between two distant clusters of possible networks, with large inter-cluster distance but very small intra-cluster distance. That is why the concept of diversity in metabolic networks must be carefully analyzed with synthetic and real data that allow observing the behavior and quality of the solutions.</p>
<p>As a response to the current limitations, here we formalize the problem of enumerating optimal context-specific metabolic networks from a computational perspective and we develop <monospace>DEXOM</monospace>, a collection of MILP-based methods for diversity-based enumeration of optimal metabolic networks. We implemented in total four different techniques in <monospace>DEXOM</monospace>, namely: <monospace>Reaction-enum</monospace>, <monospace>Icut-enum</monospace>, <monospace>Maxdist-enum</monospace> and <monospace>Diversity-enum</monospace>.</p>
<p>The objective of this paper is threefold. First, to analyze and formalize, from a computational perspective, the enumeration problem of optimal solutions and propose different practical techniques that can be used to obtain not one but many, equally good and diverse context-specific metabolic networks. Second, to analyze how the different methods behave under different simulated and real conditions and identify real examples where the discovery of a more diverse set of possible metabolic networks have practical implications. Third, to provide an unified open source library with the different implementations that can be used in a general way to find diverse solutions.</p>
<p>In order to evaluate how well each method performs, we focused on two main aspects: 1) how well each technique is able to discover a diverse set of optimal networks, measured using different distance metrics, and 2) how the set of alternative optimal solutions is biological meaningful by assessing the predictive capabilities with real data. We performed in total around 191,000 network reconstructions with simulated data, 329,000 using microarray data from <italic>Saccharomyces cerevisiae</italic> [<xref ref-type="bibr" rid="pcbi.1008730.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref026">26</xref>] and around 67,400 using RNA-seq data from different human cancer cell lines [<xref ref-type="bibr" rid="pcbi.1008730.ref019">19</xref>]. To analyze the extent to which these results are biologically meaningful, we used these reconstructions to measure: 1) the improvement of in silico predictions of essential genes in yeast using ensembles of metabolic network; and 2) the detection of alternative enriched pathways in human cancer cells, as a way to measure the variability of different hypotheses about the metabolic state that are compatible with the experimental data (<xref ref-type="fig" rid="pcbi.1008730.g001">Fig 1</xref>).</p>
<fig id="pcbi.1008730.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Summary of the methods and validation.</title>
<p>Given an experiment (e.g., microarray data for different conditions), and a base genome-scale model, we use the four different methods included in <monospace>DEXOM</monospace> to enumerate the unknown set of multiple solutions (optimal context-specific metabolic networks, according to some objective function). Each method solves the same reconstruction problem but using a different strategy for discovering alternative solutions. Each set of optimal solutions is compared in terms of diversity, and projected into a 2D embedding to visualize which part of the space of optimal metabolic networks is explored by each method. In order to explore if a more diverse set of optimal solutions is biologically meaningful, we performed two evaluations with real data: 1) in-silico simulations of essential genes in yeast using ensembles of optimal networks; and 2) pathway enrichment in human cancer cells, using the whole set of discovered networks.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g001" xlink:type="simple"/>
</fig>
<p>To summarize, the main novelties of this work are:</p>
<list list-type="bullet">
<list-item>
<p>The analysis and identification of the computational problem involving the diversity-based enumeration of optimal context-specific metabolic network.</p>
</list-item>
<list-item>
<p>The development of a library (<monospace>DEXOM</monospace>) including four different methods (<monospace>Reaction-enum</monospace>, <monospace>Icut-enum</monospace>, <monospace>Maxdist-enum</monospace> and <monospace>Diversity-enum</monospace>) for the enumeration of optimal context-specific metabolic networks.</p>
</list-item>
<list-item>
<p>An extensive comparison using the different methods under different experimental conditions, showing how variable the spacing of valid optimal solutions usually is, and comparing the methods in terms of ability to detect these solutions.</p>
</list-item>
<list-item>
<p>The development of an open-source library integrated with COBRA Toolbox 3.0.6 [<xref ref-type="bibr" rid="pcbi.1008730.ref027">27</xref>] with the different methods for the enumeration of solutions, available at <ext-link ext-link-type="uri" xlink:href="https://github.com/MetExplore/dexom" xlink:type="simple">https://github.com/MetExplore/dexom</ext-link></p>
</list-item>
</list>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Methods</title>
<p>In this section we introduce the problem of context specific metabolic network reconstruction and the enumeration problem, we describe the four different strategies that we implemented in <monospace>DEXOM</monospace> namely: <monospace>Reaction-enum</monospace>, <monospace>Icut-enum</monospace>, <monospace>Maxdist-enum</monospace> and <monospace>Diversity-enum</monospace>.</p>
<p>
<monospace>Reaction-enum</monospace> is based on the idea of generating alternative solutions by single reaction changes. At each step, a different reaction in the model is picked and forced to be active or inactive to generate an alternative solution, which is kept only if the new solution is still optimal. The <monospace>Icut-enum</monospace> method is based on the idea of using integer-cuts as constraints to discard previously found solutions: at each step, a new solution is found and a new constraint is added to the original problem to discard this solution, making this solution not valid anymore. By progressively adding new constraints, new optimal solutions are found. The idea behind <monospace>Maxdist-enum</monospace> is to find at each step the most distant optimal solution with respect to the previous optimal solution, and using integer-cuts to avoid re-discovering the same distant solutions.</p>
<p>These three techniques, albeit simple and useful in many situations, have also limitations when it comes to discovering diverse sets of solutions. Based on the analysis of their limitations, we developed a fourth technique called <monospace>Diversity-enum</monospace>, a method that takes the best of the other three techniques without their disadvantages. Using experimental data for a particular condition and organism, <monospace>Diversity-enum</monospace> first construct an initial set of sub-networks by testing single variations of reactions that may or may not be present in the networks without affecting the optimality. This set is then incrementally expanded to find new optimal solutions by progressively maximizing the differences with other solutions previously found, increasing the distance at each step.</p>
<sec id="sec003">
<title>Optimal context-specific metabolic network reconstruction</title>
<p>Here we consider the reconstruction of optimal context-specific metabolic networks as the selection of a subset of reactions from a global genome-scale metabolic network for a particular organism, in a way that maximizes the agreement with experimental data, i.e., reactions in the model with evidence of being active in a given context should be preserved, and reactions with evidence of being inactive should be removed from the model. The selection of this subset of reactions is also subject to flux-based constraints, which constrain the space of possible ways in which those reactions can be selected. More formally, given:</p>
<list list-type="bullet">
<list-item>
<p><italic>G</italic> = {<italic>R</italic>, <italic>M</italic>, <italic>S</italic>}, an initial genome-scale metabolic network <italic>G</italic> for a given model organism, where <italic>R</italic> = {<italic>R</italic><sub>1</sub>, …, <italic>R</italic><sub><italic>n</italic></sub>} is the set of reactions in the network, <italic>M</italic> = {<italic>M</italic><sub>1</sub>, …, <italic>M</italic><sub><italic>m</italic></sub>} is the set of metabolites, and <italic>S</italic> is the stoichiometry matrix of size <italic>m</italic> × <italic>n</italic></p>
</list-item>
<list-item>
<p>
<inline-formula id="pcbi.1008730.e001">
<alternatives>
<graphic id="pcbi.1008730.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e001" xlink:type="simple"/>
<mml:math display="inline" id="M1">
<mml:mrow>
<mml:mi>f</mml:mi>
<mml:mrow>
<mml:mo>(</mml:mo>
<mml:mi>x</mml:mi>
<mml:mo>)</mml:mo>
</mml:mrow>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo>{</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>}</mml:mo>
</mml:mrow>
<mml:mi>n</mml:mi>
</mml:msup>
<mml:mo>→</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi>
</mml:mrow>
</mml:math>
</alternatives>
</inline-formula>, a linear objective function of the form <bold><italic>c</italic><sup><italic>T</italic></sup> <italic>x</italic></bold> that returns a score for a candidate subset of reactions indexed by a binary vector <bold><italic>x</italic></bold> ∈ {0, 1}<sup><italic>n</italic></sup>, indicating whether reaction <italic>R</italic><sub><italic>i</italic></sub> is selected or not, so that the subset of selected reactions from <italic>R</italic> is defined as <italic>R</italic><sub><italic>c</italic></sub> = {<italic>R</italic><sub><italic>i</italic></sub> ∈ <italic>R</italic>∣<italic>x</italic><sub><italic>i</italic></sub> = 1, ∀<italic>i</italic> ∈ 1…<italic>n</italic>}</p>
</list-item>
</list>
<p>The goal is to find the binary vector <bold><italic>x</italic></bold> (or equivalently the subset <italic>R</italic><sub><italic>c</italic></sub>) such that <italic>f</italic>(<bold><italic>x</italic></bold>) is maximized. Reactions included in the <italic>R</italic><sub><italic>c</italic></sub> set have to carry a non-zero flux under steady state conditions. This problem can be stated as a Mixed Integer Linear Programming (MILP) problem with the following form: 
<disp-formula id="pcbi.1008730.e002"><alternatives><graphic id="pcbi.1008730.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e002" xlink:type="simple"/><mml:math display="block" id="M2"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mtable><mml:mtr><mml:mtd columnalign="right"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mo form="prefix" movablelimits="true">max</mml:mo></mml:mstyle></mml:mtd> <mml:mtd columnalign="left" columnspan="3"><mml:mrow><mml:mi>f</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>=</mml:mo> <mml:mrow><mml:msup><mml:mi mathvariant="bold-italic">c</mml:mi> <mml:mi mathvariant="bold-italic">T</mml:mi></mml:msup> <mml:mi mathvariant="bold-italic">x</mml:mi></mml:mrow></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="normal">s</mml:mi> <mml:mo>.</mml:mo> <mml:mi mathvariant="normal">t</mml:mi> <mml:mo>.</mml:mo></mml:mrow></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mi mathvariant="bold-italic">S</mml:mi> <mml:mo>·</mml:mo> <mml:mi mathvariant="bold-italic">v</mml:mi></mml:mrow></mml:mtd> <mml:mtd><mml:mo>=</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:mn mathvariant="bold">0</mml:mn></mml:mtd> <mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>*</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mrow></mml:mtd> <mml:mtd><mml:mrow><mml:mo>≤</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>≤</mml:mo></mml:mrow></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>*</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mrow></mml:mtd> <mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left" columnspan="4"><mml:mrow><mml:mi mathvariant="bold-italic">v</mml:mi> <mml:mo>∈</mml:mo> <mml:msup><mml:mi mathvariant="double-struck">R</mml:mi> <mml:mi>n</mml:mi></mml:msup> <mml:mo>,</mml:mo> <mml:mspace width="1em"/><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>∈</mml:mo> <mml:msup><mml:mrow><mml:mo>{</mml:mo> <mml:mn>0</mml:mn> <mml:mo>,</mml:mo> <mml:mn>1</mml:mn> <mml:mo>}</mml:mo></mml:mrow> <mml:mi>n</mml:mi></mml:msup></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(1)</label></disp-formula>
where <italic>x</italic><sub><italic>i</italic></sub> ∈ {<italic>x</italic><sub>1</sub>, …, <italic>x</italic><sub><italic>n</italic></sub>} are the binary variables representing if reaction <italic>R</italic><sub><italic>i</italic></sub> is present or not, <italic>v</italic><sub><italic>i</italic></sub> ∈ {<italic>v</italic><sub>1</sub>, …, <italic>v</italic><sub><italic>n</italic></sub>} the variables representing the flux through each reaction <italic>R</italic><sub><italic>i</italic></sub>, and <italic>v</italic><sub><italic>min</italic></sub> and <italic>v</italic><sub><italic>max</italic></sub> the lower and upper bounds for the flux through each reaction. Note that what is subject to optimization is the selection of the reactions but not the fluxes. Fluxes are constrained within some bounds <bold><italic>v</italic><sub><italic>min</italic></sub></bold> and <bold><italic>v</italic><sub><italic>max</italic></sub></bold>, and forced to be in steady state (<bold><italic>S</italic></bold> ⋅ <bold><italic>v</italic></bold> = 0). Reactions can be included (<italic>x</italic><sub><italic>i</italic></sub> = 1) only if they can carry some non-zero flux, and reactions not included (<italic>x</italic><sub><italic>i</italic></sub> = 0) are forced to carry a zero flux. In the following, we shall use this notation to introduce different MILP problems for context-specific reconstruction of metabolic networks.</p>
<p>The objective function <italic>f</italic>(<bold><italic>x</italic></bold>) calculates the agreement between the experimental data and the selected reactions. One common strategy is to divide reactions in two disjoint sets based on experimental evidence, namely reactions associated with highly expressed enzymes (<italic>R</italic><sub><italic>H</italic></sub> ⊆ <italic>R</italic>) and reactions associated with lowly expressed enzymes (<italic>R</italic><sub><italic>L</italic></sub> ⊆ <italic>R</italic>), and then defining <italic>f</italic>(<bold><italic>x</italic></bold>) as:
<disp-formula id="pcbi.1008730.e003"><alternatives><graphic id="pcbi.1008730.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e003" xlink:type="simple"/><mml:math display="block" id="M3"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>f</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>=</mml:mo> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>H</mml:mi></mml:msub></mml:mrow></mml:munder> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:munder> <mml:mn>1</mml:mn> <mml:mo>-</mml:mo> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(2)</label></disp-formula></p>
<p>This is the strategy described in <monospace>iMAT</monospace>, in which the selection of one reaction in <italic>R</italic><sub><italic>H</italic></sub> or the removal of one reaction in <italic>R</italic><sub><italic>L</italic></sub> contribute in the same way to the score. Other strategies such as <monospace>Fastcore</monospace>, enforce the inclusion of all the reactions in <italic>R</italic><sub><italic>H</italic></sub>, and so <italic>f</italic>(<bold><italic>x</italic></bold>) evaluates only the number of selected reactions in <italic>R</italic><sub><italic>L</italic></sub> to minimize it.</p>
<p>In practice, the binary vector <bold>x</bold> is extended to account also for reversible reactions in the <italic>R</italic><sub><italic>H</italic></sub> set that can be active carrying a negative flux. Also, a tunable parameter <italic>ϵ</italic> corresponding to the minimal amount of flux a reaction has to carry to be considered active is usually included in the optimization problem. In the original <monospace>iMAT</monospace> formulation, a reaction <italic>R</italic><sub><italic>i</italic></sub> ∈ <italic>R</italic><sub><italic>L</italic></sub> which is not selected (which carries no flux) has a value of <italic>x</italic><sub><italic>i</italic></sub> = 1 representing a match with the experimental data, and so <xref ref-type="disp-formula" rid="pcbi.1008730.e003">Eq 2</xref> simplifies to just <italic>f</italic>(<bold>x</bold>) = ∑<sub><italic>i</italic></sub> <italic>x</italic><sub><italic>i</italic></sub>. The full problem specification is described in <xref ref-type="disp-formula" rid="pcbi.1008730.e004">Eq 3</xref>:
<disp-formula id="pcbi.1008730.e004"><alternatives><graphic id="pcbi.1008730.e004g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e004" xlink:type="simple"/><mml:math display="block" id="M4"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mtable><mml:mtr><mml:mtd columnalign="right"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mo form="prefix" movablelimits="true">max</mml:mo></mml:mstyle></mml:mtd> <mml:mtd columnalign="left" columnspan="3"><mml:mrow><mml:munder><mml:mo>∑</mml:mo> <mml:mi>i</mml:mi></mml:munder> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="normal">s</mml:mi> <mml:mo>.</mml:mo> <mml:mi mathvariant="normal">t</mml:mi> <mml:mo>.</mml:mo></mml:mrow></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mi mathvariant="bold-italic">S</mml:mi> <mml:mo>·</mml:mo> <mml:mi mathvariant="bold-italic">v</mml:mi></mml:mrow></mml:mtd> <mml:mtd><mml:mo>=</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:mn mathvariant="bold">0</mml:mn></mml:mtd> <mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>+</mml:mo></mml:msubsup> <mml:mrow><mml:mo>(</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub> <mml:mo>-</mml:mo> <mml:mi>ϵ</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≥</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>H</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>-</mml:mo></mml:msubsup> <mml:mrow><mml:mo>(</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub> <mml:mo>+</mml:mo> <mml:mi>ϵ</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≤</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>H</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mi>o</mml:mi></mml:msubsup> <mml:mo>·</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≥</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mi>o</mml:mi></mml:msubsup> <mml:mo>·</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≤</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd/><mml:mtd/><mml:mtd/><mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left" columnspan="4"><mml:mrow><mml:mi mathvariant="bold-italic">v</mml:mi> <mml:mo>∈</mml:mo> <mml:msup><mml:mi mathvariant="double-struck">R</mml:mi> <mml:mi>n</mml:mi></mml:msup> <mml:mo>,</mml:mo> <mml:mspace width="4pt"/></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left" columnspan="4"><mml:mrow><mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>+</mml:mo></mml:msup> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>-</mml:mo></mml:msup> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi>o</mml:mi></mml:msup> <mml:mo>∈</mml:mo> <mml:msup><mml:mrow><mml:mo>{</mml:mo> <mml:mn>0</mml:mn> <mml:mo>,</mml:mo> <mml:mn>1</mml:mn> <mml:mo>}</mml:mo></mml:mrow> <mml:mrow><mml:mo>|</mml:mo> <mml:mi>R</mml:mi> <mml:mo>|</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left" columnspan="4"><mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>=</mml:mo> <mml:mrow><mml:mo>(</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>+</mml:mo></mml:msup> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>-</mml:mo></mml:msup> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi>o</mml:mi></mml:msup> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>∈</mml:mo> <mml:msup><mml:mrow><mml:mo>{</mml:mo> <mml:mn>0</mml:mn> <mml:mo>,</mml:mo> <mml:mn>1</mml:mn> <mml:mo>}</mml:mo></mml:mrow> <mml:mrow><mml:mn>3</mml:mn> <mml:mo>|</mml:mo> <mml:mi>R</mml:mi> <mml:mo>|</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(3)</label></disp-formula></p>
<p>It is important to remark that the methods presented here are general strategies for enumerating optimal metabolic network reconstructions, and therefore can be used with different base algorithms for the reconstruction, as long as they are implemented as MILPs. This means that the methods serve to enumerate <monospace>iMAT</monospace>-like solutions [<xref ref-type="bibr" rid="pcbi.1008730.ref007">7</xref>], <monospace>Fastcore</monospace>-like solutions [<xref ref-type="bibr" rid="pcbi.1008730.ref015">15</xref>], <monospace>INIT</monospace>-like solutions [<xref ref-type="bibr" rid="pcbi.1008730.ref014">14</xref>], or any other type of MILP-based reconstruction.</p>
<p>In the following sections, for practical reasons and without loss of generality, we use the original set of <monospace>iMAT</monospace> constraints and objective function as the base MILP problem for network enumeration, since: 1) it relies on a MILP formulation, which can be easily adapted to optimally solve different optimization problems and objectives; and 2) the default objective function optimizes a trade-off between the coverage of reactions associated with highly expressed genes and reactions associated with lowly expressed genes, which has been proven in practice a good general strategy that only requires gene expression data. This trade-off introduces flexibility in the optimization process, allowing us to predict that some reactions are not active even though they are associated with highly expressed genes, something important to account for post-transcriptional events.</p>
</sec>
<sec id="sec004">
<title>The problem of enumerating optimal metabolic networks</title>
<p>The enumeration problem arises naturally in context-specific reconstruction of metabolic networks due to the discrete nature of the selection of reactions and the imbalance between the available constraints and the complex topology of the networks, leading to an undertermined problem.</p>
<p>In order to better analyze the enumeration problem from a computational point of view, we use a Directed Acyclic Graph (DAG) network model. Directed Acyclic Graphs are commonly used for the analysis of biology networks in general [<xref ref-type="bibr" rid="pcbi.1008730.ref028">28</xref>]. By representing metabolic networks as DAGs, we can calculate in advance how many optimal solutions we can expect, and thus compare the techniques with a ground truth (i.e., the full set of optimal solutions that exist in a specific DAG) in an objective manner focusing specifically on the computational problem of enumeration. This is important because although the scope of application is biological, the technique is basically computational and requires a proper computational analysis of the problem under study. <xref ref-type="fig" rid="pcbi.1008730.g002">Fig 2</xref> shows the generic DAG metabolic network with <italic>L</italic> layers of <italic>N</italic> metabolites. Each metabolite <italic>m</italic><sub><italic>i</italic>,<italic>k</italic></sub> in layer <italic>L</italic><sub><italic>k</italic></sub> is connected to each metabolite <italic>m</italic><sub><italic>j</italic>,<italic>k</italic>+1</sub> in <italic>L</italic><sub><italic>k</italic>+1</sub> by single reactions <italic>R</italic><sub><italic>ijk</italic></sub> = (<italic>m</italic><sub><italic>i</italic>,<italic>k</italic></sub>, <italic>m</italic><sub><italic>j</italic>,<italic>k</italic>+1</sub>) with only one substrate and product. The model includes two extra metabolites <italic>m</italic><sub><italic>s</italic></sub> as a source and <italic>m</italic><sub><italic>t</italic></sub> as a sink node to centralize
the import and export reactions and simplify the model. The number of total metabolites, including <italic>m</italic><sub><italic>s</italic></sub> and <italic>m</italic><sub><italic>t</italic></sub> is 2 + <italic>N</italic> ⋅ <italic>L</italic>, and the number of total reactions is 2<italic>N</italic> + <italic>N</italic><sup>2</sup> ⋅ (<italic>L</italic> − 1).</p>
<fig id="pcbi.1008730.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Directed Acyclic Graph (DAG) metabolic network model.</title>
<p>This figure illustrates the DAG metabolic model that we use to analyze the computational issues related to the enumeration of optimal context-specific network reconstructions using MILP-based reconstruction methods. The metabolic network is divided into <italic>L</italic> layers, each layer containing <italic>n</italic> metabolites.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g002" xlink:type="simple"/>
</fig>
<p>In this example, we want to extract the context-specific metabolic network, given the following conditions:</p>
<list list-type="bullet">
<list-item>
<p>∑<sub><italic>i</italic></sub> |<italic>v</italic><sub><italic>i</italic></sub>| &gt; 0, i.e., there is a non-zero steady state flux from <italic>m</italic><sub><italic>s</italic></sub> to <italic>m</italic><sub><italic>t</italic></sub>. This is commonly assumed in order to avoid having an empty network.</p>
</list-item>
<list-item>
<p><italic>R</italic><sub><italic>H</italic></sub> = ∅, <italic>R</italic><sub><italic>L</italic></sub> = <italic>R</italic>, i.e., there are no reactions associated to highly expressed enzymes, and all the reactions are associated with lowly expressed enzymes.</p>
</list-item>
</list>
<p>It can be seen that a metabolic network with optimal <italic>f</italic>(<bold>x</bold>) in this case is the one that carries flux from <italic>m</italic><sub><italic>s</italic></sub> to <italic>m</italic><sub><italic>t</italic></sub> using the minimum number of reactions (since they are all in the <italic>R</italic><sub><italic>L</italic></sub> set), which corresponds to a shortest path from <italic>m</italic><sub><italic>s</italic></sub> to <italic>m</italic><sub><italic>t</italic></sub>. Since there are no loops in the network, the shortest length for the path is <italic>L</italic> + 2 (including the path from <italic>m</italic><sub><italic>s</italic></sub> to <italic>L</italic><sub>1</sub> and from <italic>L</italic><sub><italic>N</italic></sub> to <italic>m</italic><sub><italic>t</italic></sub>). This also implies that there is no single solution, but instead any path from <italic>m</italic><sub><italic>s</italic></sub> to <italic>m</italic><sub><italic>t</italic></sub> is an optimal solution, i.e., a context-specific reconstruction network with optimal <italic>f</italic>(<bold>x</bold>) given the previously defined conditions. Since there are <italic>N</italic> different paths to go from any metabolite in layer <italic>L</italic><sub><italic>j</italic></sub> to any metabolite in layer <italic>L</italic><sub><italic>j</italic>+1</sub>, that makes <italic>N</italic><sup><italic>L</italic></sup> possible optimal networks in this particular example, that is, the number of possible optimal solutions in this example grows exponentially with the number of layers. Note also that since the number of reactions for a fixed number of metabolites grows linearly with the number of layers, the number of possible solutions grows also exponentially with the number of reactions.</p>
<p>This example illustrates that there are instances of the enumeration problem for which the number of optimal solutions grows exponentially with the size of the network. Thus, in general, enumerating the full set of optimal metabolic networks can be impractical, especially considering the size of networks such as Recon 3D [<xref ref-type="bibr" rid="pcbi.1008730.ref029">29</xref>] with 13,543 reactions, or the recent Human1 network [<xref ref-type="bibr" rid="pcbi.1008730.ref030">30</xref>] with around 13,000 reactions.</p>
<p>More formally, it can be shown that the enumeration of all optimal metabolic networks is a type of <italic>vertex enumeration problem</italic>. Let <italic>M</italic><sub><italic>P</italic></sub> be the general MILP problem for context specific reconstruction using a GSMN with <italic>n</italic> reactions and with objective function <bold>c</bold><sup><bold>T</bold></sup> <bold>x</bold> that we want to maximize, as defined in <xref ref-type="disp-formula" rid="pcbi.1008730.e002">Eq 1</xref>. Let <italic>Ω</italic> be the set of all 0/1-vectors representing the feasible solutions for the MILP <italic>M</italic><sub><italic>P</italic></sub> that satisfy all the constraints defined in <xref ref-type="disp-formula" rid="pcbi.1008730.e002">Eq 1</xref>. From a geometric point of view, the space of possible networks can be viewed as vertices of the hypercube <italic>C</italic><sub><italic>n</italic></sub> = {0, 1}<sup><italic>n</italic></sup>, and the set of feasible solutions <italic>Ω</italic> as a subset of vertices of <italic>C</italic><sub><italic>n</italic></sub>, where its convex hull is a 0/1-polytope <italic>P</italic>, that is, <italic>P</italic> = <italic>conv</italic>(<italic>Ω</italic>). Let <italic>z</italic>* be the optimal value of <italic>M</italic><sub><italic>P</italic></sub>, i.e., ∀<bold>x</bold> ∈ <italic>Ω</italic>, <bold>c</bold><sup><bold>T</bold></sup> <bold>x</bold> ≤ <italic>z</italic>*. We are interested in the set of all optimal feasible solutions <italic>Ω</italic>* ≔ {<bold>x</bold>* ∣ <bold>x</bold>* ∈ <italic>Ω</italic>∧<bold>c</bold><sup><bold>T</bold></sup> <bold>x</bold>* = <italic>z</italic>*}, where <italic>P</italic>* = <italic>conv</italic>(<italic>Ω</italic>*) ⊆ <italic>P</italic> is the 0/1-subpolytope of interest in <inline-formula id="pcbi.1008730.e005"><alternatives><graphic id="pcbi.1008730.e005g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e005" xlink:type="simple"/><mml:math display="inline" id="M5"><mml:mi mathvariant="script">H</mml:mi></mml:math></alternatives></inline-formula>-representation (as the intersection of half spaces defined by all the constraints) from which we want to obtain the <inline-formula id="pcbi.1008730.e006"><alternatives><graphic id="pcbi.1008730.e006g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e006" xlink:type="simple"/><mml:math display="inline" id="M6"><mml:mi mathvariant="script">V</mml:mi></mml:math></alternatives></inline-formula>-representation, that is, the set of vertices as vectors of 0/1 coordinates (the optimal context-specific networks), which is the definition of the <italic>vertex enumeration problem</italic>.</p>
<p>Vertex enumeration [<xref ref-type="bibr" rid="pcbi.1008730.ref031">31</xref>] is a classical problem in the field of combinatorial optimization for which some specific techniques were proposed [<xref ref-type="bibr" rid="pcbi.1008730.ref032">32</xref>]. For the special case of 0/1-polytopes [<xref ref-type="bibr" rid="pcbi.1008730.ref033">33</xref>], some notable approaches are Binary Decision Diagrams [<xref ref-type="bibr" rid="pcbi.1008730.ref034">34</xref>–<xref ref-type="bibr" rid="pcbi.1008730.ref038">38</xref>], tree search-based methods [<xref ref-type="bibr" rid="pcbi.1008730.ref039">39</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref040">40</xref>] and techniques based on branch-and-bound and cutting planes, extensively exploited in academic/commercial solvers such as IBM CPLEX and Gurobi. In fact, as a general enumeration mechanism, these solvers incorporate the concept of a pool of optimal solutions, in which the tree of feasible solutions continues to be explored until a specific number of optimal feasible solutions have been found.</p>
<p>However, as discussed before, the number of optimal metabolic networks for a given problem can be extremely large, and so classical vertex enumeration techniques are not suitable for this task. One reason is that, given the potential vast number of possible solutions and a fixed amount of time to generate a variety of optimal solutions, there is no guarantee that these methods will generate a diverse set of solutions. In fact, the opposite is more likely: similar solutions (e.g., small variations in reactions on the same pathway) will probably be closer in the search space. Also, due to symmetries introduced by loops and other patterns in metabolic networks, chances are that the enumeration gets trapped performing enumeration in small dense regions of the search space that can be more related to artifacts than to solutions with true biological meaning.</p>
<p>In the following sections we present four enumeration strategies and analyze their advantages and drawbacks. It should be noted that we limited to a set of generic techniques that can be implemented on top of general MILP solvers and can be easily integrated in the existent pipelines for network reconstruction. One disadvantage of this is that each solution is obtained by constructing and solving a new MILP problem. Ad-hoc search strategies for the enumeration of MILP solutions based on custom branch-and-cut methods or more advanced tree search exploration, although they might be more efficient in some situations, are out of the scope of this work.</p>
</sec>
<sec id="sec005">
<title>Enumeration of optimal networks by inclusion or exclusion of reactions (<monospace>DEXOM Reaction-enum</monospace>)</title>
<p>A simple way to generate alternative optimal metabolic networks can be achieved by directly manipulating the flux bounds of each reaction to force it to carry some positive flux, some negative flux (if reversible), or no flux, as in [<xref ref-type="bibr" rid="pcbi.1008730.ref020">20</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref021">21</xref>]. The original method traverses all the reactions in the model testing forward (or backward flux if the reaction is reversible) or blocking flux in order to generate a new solution with a different activation for each reaction. Solutions that are still optimal after the modification are added to the set of optimal solutions. This method has however two major limitations: 1) it only explores variations in single reactions (if they can be active or inactive in an optimal solution), leaving the vast space of combinations between reactions completely unexplored; and 2) it generates many duplicated solutions, wasting computation time.</p>
<p>We included this basic idea in <monospace>DEXOM</monospace> as a simple mechanism to generate alternative solutions, under the name of <monospace>Reaction-enum</monospace>, with some modifications and further options that can be used to reduce some of the limitations in its basic form. One option that can be enabled to alleviate the problem of generating duplicated solutions consists in tracking the activation or inactivation of each reaction in the set of alternative optimal networks during the search process. If forcing the flux through a reaction <italic>R</italic><sub><italic>ijk</italic></sub> results in an optimal sub-network with another reaction <italic>R</italic><sub><italic>i</italic>,<italic>j</italic>+1,<italic>k</italic>+1</sub> active, then there is no need to force flux through <italic>R</italic><sub><italic>i</italic>,<italic>j</italic>+1,<italic>k</italic>+1</sub> as it is not guaranteed that this operation is going to generate a new sub-network (unless the solver is adjusted to increase randomness in the solutions returned).</p>
<p>One advantage of the <monospace>Reaction-enum</monospace> method is that it tests every reaction in the model to see if its presence or absence affects the quality of the solution. This generates alternative networks with modifications in every possible pathway of the metabolic network, which makes this technique a good starting point for more advanced enumeration methods (for example, to generate a set of initial candidate optimal solutions).</p>
</sec>
<sec id="sec006">
<title>Exhaustive enumeration of optimal networks (<monospace>DEXOM Icut-enum</monospace>)</title>
<p>One simple way to perform a full enumeration of the set of optimal networks is by adding integer-cuts, linear constraints that can be added to the original problem to remove already visited solutions from the set of feasible solutions. This method, which has been already used to enumerate solutions in general for MILP problems [<xref ref-type="bibr" rid="pcbi.1008730.ref041">41</xref>], can be used as well as a mechanism to enumerate alternative metabolic networks. We adapted this technique for enumeration of context-specific reconstructions under the name of <monospace>DEXOM Icut-enum</monospace>. Starting with a default optimal solution <bold>x</bold>* to the MILP problem defined in <xref ref-type="disp-formula" rid="pcbi.1008730.e004">Eq 3</xref>, a new solution is generated by adding a new constraint to the original problem to cut <bold>x</bold>* from the set of feasible optimal solutions. This process is repeated for each new solution, adding a new constraint per solution. A new solution is accepted if there is at least one different reaction in the candidate sub-network, that is:
<disp-formula id="pcbi.1008730.e007"><alternatives><graphic id="pcbi.1008730.e007g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e007" xlink:type="simple"/><mml:math display="block" id="M7"><mml:mrow><mml:mstyle displaystyle="true"><mml:munder><mml:mo>∑</mml:mo><mml:mi>i</mml:mi></mml:munder><mml:mrow><mml:mo>|</mml:mo><mml:mrow><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>i</mml:mi><mml:mo>*</mml:mo></mml:msubsup></mml:mrow><mml:mo>|</mml:mo></mml:mrow></mml:mstyle><mml:mo>≥</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives> <label>(4)</label></disp-formula></p>
<p>Although this constraint is not linear due to the absolute value, it can be linearized by considering separately the ones from the zeros. Two solutions are equal if they have the same set of active reactions and the same set of inactive reactions. Thus, for each <inline-formula id="pcbi.1008730.e008"><alternatives><graphic id="pcbi.1008730.e008g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e008" xlink:type="simple"/><mml:math display="inline" id="M8"><mml:mrow><mml:msubsup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>=</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>, we expect to have <italic>x</italic><sub><italic>i</italic></sub> = 1, and for each <inline-formula id="pcbi.1008730.e009"><alternatives><graphic id="pcbi.1008730.e009g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e009" xlink:type="simple"/><mml:math display="inline" id="M9"><mml:mrow><mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>=</mml:mo> <mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>, we expect <italic>x</italic><sub><italic>i</italic></sub> = 0 if both the previous solution and the candidate are equal. Under this situation, summing up all the ones from <italic>x</italic><sub><italic>i</italic></sub> for which <inline-formula id="pcbi.1008730.e010"><alternatives><graphic id="pcbi.1008730.e010g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e010" xlink:type="simple"/><mml:math display="inline" id="M10"><mml:mrow><mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>=</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> should be equal to <inline-formula id="pcbi.1008730.e011"><alternatives><graphic id="pcbi.1008730.e011g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e011" xlink:type="simple"/><mml:math display="inline" id="M11"><mml:mrow><mml:mo>∑</mml:mo> <mml:mo>(</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula> (except if there is one or more differences), and in the same way, summing up all the zeros from <italic>x</italic><sub><italic>i</italic></sub> for which <inline-formula id="pcbi.1008730.e012"><alternatives><graphic id="pcbi.1008730.e012g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e012" xlink:type="simple"/><mml:math display="inline" id="M12"><mml:mrow><mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>=</mml:mo> <mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> should be equal to zero. If this does not happen, then there is some difference between the candidate solution <italic>x</italic><sub><italic>i</italic></sub> and a previous optimal solution <italic>x</italic>*. More formally, the linearization of <xref ref-type="disp-formula" rid="pcbi.1008730.e007">Eq 4</xref> can be written as:
<disp-formula id="pcbi.1008730.e013"><alternatives><graphic id="pcbi.1008730.e013g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e013" xlink:type="simple"/><mml:math display="block" id="M13"><mml:mtable columnalign="left" equalcolumns="true" equalrows="true"><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mstyle displaystyle="true"><mml:munder><mml:mo>∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>∈</mml:mo><mml:mi>A</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:mo>−</mml:mo><mml:mstyle displaystyle="true"><mml:munder><mml:mo>∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>∈</mml:mo><mml:mi>B</mml:mi></mml:mrow></mml:munder></mml:mstyle><mml:msub><mml:mi>x</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mspace width="10pt"/><mml:mrow><mml:mo>≤</mml:mo><mml:mspace width="10pt"/><mml:mo>(</mml:mo><mml:mrow><mml:mstyle displaystyle="true"><mml:munder><mml:mo>∑</mml:mo><mml:mi>i</mml:mi></mml:munder></mml:mstyle><mml:msubsup><mml:mi>x</mml:mi><mml:mi>i</mml:mi><mml:mo>*</mml:mo></mml:msubsup></mml:mrow><mml:mo>)</mml:mo><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:mtd></mml:mtr><mml:mtr columnalign="left"><mml:mtd columnalign="left"><mml:mrow><mml:mi>A</mml:mi><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>∣</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>i</mml:mi><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mo>}</mml:mo><mml:mo>,</mml:mo></mml:mrow><mml:mrow><mml:mi>B</mml:mi><mml:mo>=</mml:mo><mml:mo>{</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>∣</mml:mo><mml:msubsup><mml:mi>x</mml:mi><mml:mi>i</mml:mi><mml:mo>*</mml:mo></mml:msubsup><mml:mo>=</mml:mo><mml:mn>0</mml:mn></mml:mrow><mml:mo>}</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(5)</label></disp-formula></p>
<p>By adding this constraint for each new <bold>x</bold>* returned by the solver, we exclude all the previous solutions that have been found so far. The generation of new solutions stops when the problem becomes infeasible, that is, there are no more feasible optimal solutions. Note that this cut can be modified to cut feasible optimal solutions that differ in more than one reaction, i.e., to cut solutions that are at some specific Hamming distance.</p>
<p>The advantage of this method is that it enumerates all possible solutions since it removes one by one every optimal feasible solution. It is straightforward to see that this method enumerates all the feasible optimal solutions by observing that: 1) each cut removes one optimal solution; 2) the number of constraints that are added grows monotonically with every new optimal solution; and 3) the number of solutions is finite. Let us assume that for a given problem, the set of optimal feasible solutions <italic>Ω</italic>* contains <italic>N</italic> different solutions, i.e., for every pair {<italic>x</italic>*, <italic>y</italic>*} ⊂ <italic>Ω</italic>*, <inline-formula id="pcbi.1008730.e014"><alternatives><graphic id="pcbi.1008730.e014g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e014" xlink:type="simple"/><mml:math display="inline" id="M14"><mml:mrow><mml:msub><mml:mo>∑</mml:mo> <mml:mi>i</mml:mi></mml:msub> <mml:mo>|</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>-</mml:mo> <mml:msubsup><mml:mi>y</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>|</mml:mo> <mml:mo>≥</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> (there is at least one different reaction between any two optimal solutions). For the sake of the proof, let us assume that after <italic>N</italic> steps of the algorithm, and after adding <italic>N</italic> integer cuts, one per optimal solution, the last MILP problem is still feasible, i.e., solving it returns a solution <italic>z</italic>*, thus: 1) <italic>z</italic>* is different to any other solutions in at least one reaction, which means that there are at least <italic>N</italic> + 1 solutions, contradicting the initial assumption; or 2) <italic>z</italic>* is a duplicated solution, that is, there exist a solution <italic>x</italic>* ∈ <italic>Ω</italic>* such that <inline-formula id="pcbi.1008730.e015"><alternatives><graphic id="pcbi.1008730.e015g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e015" xlink:type="simple"/><mml:math display="inline" id="M15"><mml:mrow><mml:msub><mml:mo>∑</mml:mo> <mml:mi>i</mml:mi></mml:msub> <mml:mo>|</mml:mo> <mml:msubsup><mml:mi>z</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>-</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>|</mml:mo> <mml:mo>=</mml:mo> <mml:mn>0</mml:mn></mml:mrow></mml:math></alternatives></inline-formula>, which contradicts the definition of the integer cut.</p>
<p>However, in practice, it is not possible to enumerate the entire space of solutions due to the potential number of possible optimal solutions. Although this technique can be also used to generate a sample of optimal solutions (stopping the search after a desired number of solutions was found), the method is not well suited for this task since: 1) the number of constraints grows linearly with the number of solutions, which increases the computational difficulty with each new solution; 2) the algorithm can get trapped enumerating solutions in a small region of the whole space of possible optimal solutions, and so diversity in the set of solutions is not guaranteed; 3) even if a new optimal solution exists, due to numerical instabilities or precision errors, the search process can prematurely stop at the first incorrectly detected infeasible problem.</p>
</sec>
<sec id="sec007">
<title>Enumeration of optimal networks with maximum dissimilarity (<monospace>DEXOM Maxdist-enum</monospace>)</title>
<p>Another strategy for the enumeration of optimal solutions is to search the most dissimilar metabolic network to a previous optimal one. This idea, already explored in the context of Integer Programming problems [<xref ref-type="bibr" rid="pcbi.1008730.ref025">25</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref042">42</xref>], has been also proposed for metabolic networks [<xref ref-type="bibr" rid="pcbi.1008730.ref022">22</xref>]. The strategy requires to solve a bilevel optimization problem in which the inner optimization problem solves the original problem and the outer optimization maximizes dissimilarity. This particular bilevel optimization can be implemented as a standard MILP problem, by introducing a constraint that corresponds to the original objective function. First, an optimal solution <bold>x</bold>* with optimal score <italic>f</italic>(<bold>x</bold>*) = <italic>z</italic>* is calculated using the problem defined in <xref ref-type="disp-formula" rid="pcbi.1008730.e004">Eq 3</xref>, and then the original objective function is replaced by the minimization of a function <italic>g</italic>(<bold>x</bold>, <bold>x</bold>*) which measures the similarity between the candidate solution <bold>x</bold> and a reference optimal solution <bold>x</bold>*. In order to guarantee that the new solution to this new problem is still optimal in the original problem, a new constraint <italic>f</italic>(<bold>x</bold>) = ∑<sub><italic>i</italic></sub>(<italic>x</italic><sub><italic>i</italic></sub>) = <italic>z</italic>* has to be added to preserve optimality.</p>
<p>Although the idea of returning the most dissimilar optimal network is interesting, one of the limitations is that it can easily oscillate between a small set of optimal networks that are the most distant to each other, since only the previous optimal solution is discarded. Consequently, the technique also does not guarantee to obtain all the possible solutions. We have introduced a modification to the original idea to break this pattern and allow the complete enumeration of solutions by adding integer cuts. This modification prevents trivial oscillations between already visited solutions and enumerates the space of solutions starting from the most extreme differences. We refer to this technique as <monospace>Maxdist-enum</monospace>.</p>
<p>The objective function <italic>g</italic> of the <monospace>Maxdist-enum</monospace> method can be defined as the minimization of the overlapping of ones between <bold>x</bold> and <bold>x</bold>*. Note that the optimality constraint guarantees that the solutions must have the same number of ones (same score), and so removing one overlap (for example by not including a reaction in <italic>R</italic><sub><italic>H</italic></sub> which is present in the reference solution) has to be compensated by including another reaction in the set of <italic>R</italic><sub><italic>H</italic></sub> not present in the reference solution, or by removing one reaction in the <italic>R</italic><sub><italic>L</italic></sub> set which is present in the reference solution, in order to preserve the original optimal score. Minimization of the overlapping of ones between <bold>x</bold> and <bold>x</bold>* with this constraint is essentially the same as finding the most extreme vertices of the 0/1-polytope of feasible optimal solutions using the Hamming distance.
<disp-formula id="pcbi.1008730.e016"><alternatives><graphic id="pcbi.1008730.e016g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e016" xlink:type="simple"/><mml:math display="block" id="M16"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mtable><mml:mtr><mml:mtd columnalign="right"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:munder><mml:mo form="prefix" movablelimits="true">min</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi></mml:munder></mml:mstyle></mml:mtd> <mml:mtd columnalign="left" columnspan="3"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mi>g</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>=</mml:mo> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>=</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:munder> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mtd></mml:mtr> <mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi mathvariant="normal">s</mml:mi> <mml:mo>.</mml:mo> <mml:mi mathvariant="normal">t</mml:mi> <mml:mo>.</mml:mo></mml:mrow></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mi mathvariant="bold-italic">S</mml:mi> <mml:mo>·</mml:mo> <mml:mi mathvariant="bold-italic">v</mml:mi></mml:mrow></mml:mtd> <mml:mtd><mml:mo>=</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:mn>0</mml:mn></mml:mtd> <mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>i</mml:mi> <mml:mo>∈</mml:mo> <mml:mi>A</mml:mi></mml:mrow></mml:munder> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>-</mml:mo> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>i</mml:mi> <mml:mo>∈</mml:mo> <mml:mi>B</mml:mi></mml:mrow></mml:munder> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mtd> <mml:mtd><mml:mo>≤</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:mo>(</mml:mo> <mml:munder><mml:mo>∑</mml:mo> <mml:mi>i</mml:mi></mml:munder> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>*</mml:mo></mml:msubsup> <mml:mo>)</mml:mo> <mml:mo>-</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:mstyle></mml:mtd> <mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mstyle displaystyle="true" scriptlevel="0"><mml:mrow><mml:munder><mml:mo>∑</mml:mo> <mml:mi>i</mml:mi></mml:munder> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub></mml:mrow></mml:mstyle></mml:mtd> <mml:mtd><mml:mo>=</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msup><mml:mi>z</mml:mi> <mml:mo>*</mml:mo></mml:msup></mml:mtd> <mml:mtd/></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>+</mml:mo></mml:msubsup> <mml:mrow><mml:mo>(</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub> <mml:mo>-</mml:mo> <mml:mi>ϵ</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≥</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>H</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mo>-</mml:mo></mml:msubsup> <mml:mrow><mml:mo>(</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub> <mml:mo>+</mml:mo> <mml:mi>ϵ</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≤</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>H</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mi>o</mml:mi></mml:msubsup> <mml:mo>·</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≥</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>min,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>v</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>+</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>i</mml:mi> <mml:mi>o</mml:mi></mml:msubsup> <mml:mo>·</mml:mo> <mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mrow></mml:mtd> <mml:mtd><mml:mo>≤</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:msub><mml:mi>v</mml:mi> <mml:mtext>max,i</mml:mtext></mml:msub></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mo>∀</mml:mo> <mml:mi>i</mml:mi> <mml:mo>∣</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>∈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mi>L</mml:mi></mml:msub></mml:mrow></mml:mtd></mml:mtr> <mml:mtr><mml:mtd/><mml:mtd/><mml:mtd/><mml:mtd/><mml:mtd/></mml:mtr></mml:mtable></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(6)</label></disp-formula></p>
<p>The expected behavior of this algorithm is the following: starting from the default solution <bold>x</bold>*, the search process generates the most distant network with the same optimal score. This process is repeated, changing the <bold>x</bold>* in each iteration to the one previously found, pushing away the search to the boundaries of the space until the most distant networks in the space of optimal solutions are discovered. The <monospace>integer-cut</monospace> constraint prevents search loops, and so once the extremes are found, the distance of new solutions decreases progressively.</p>
<p>This method has also limitations that may prevent its use for generating a diverse sample of optimal solutions. Concretely, even though the integer-cut constraint prevents generating repeated solutions, the density of similar metabolic networks at the boundaries can be large enough to never explore other areas. This increases the risk of ending up oscillating between a small group of clusters of networks with a large inter-cluster distance but a very small intra-cluster distance. In addition to this, the method is computationally more expensive than the previous ones.</p>
</sec>
<sec id="sec008">
<title>Diversity based extraction of optimal metabolic networks (<monospace>DEXOM Diversity-enum</monospace>)</title>
<p>Based on the previously identified problems and improvements for each method, we also propose a novel algorithm to generate a set of diverse optimal metabolic networks, combining the advantages of the techniques described before. The basic steps of <monospace>Diversity-enum</monospace> are:</p>
<list list-type="order">
<list-item>
<p>Generate an initial set of optimal solutions using the <monospace>Reaction-enum</monospace> method with integer cuts to avoid duplicated solutions.</p>
</list-item>
<list-item>
<p>Pick an initial solution <bold>x</bold><sup>(<bold>0</bold>)</sup> from this set.</p>
</list-item>
<list-item>
<p>Find a new alternative solution maximizing the differences with respect to some <italic>n</italic> random reactions that are present in the initial solution (this is, find the maximum dissimilarity optimal network with respect to only those <italic>n</italic> reactions). The number of reactions that are selected (<italic>n</italic>) increases over time, starting with only 1 reaction (alternative solutions should be different in at least one reaction), until <italic>n</italic> = number of present reactions in <bold>x</bold><sup>(<bold>0</bold>)</sup> (i.e., maximize differences with respect to all the selected reactions in the initial solution, which is essentially the same as in the <monospace>maxdist-enum</monospace> method). In this way, <monospace>Diversity-enum</monospace> behaves at the beginning more like <monospace>Reaction-enum</monospace>, increasing progressively the distance, until it behaves like <monospace>Maxdist-enum</monospace> method. The speed of this transition is controlled by the parameter <italic>d</italic><sub><italic>s</italic></sub> (see Alg. 1).</p>
</list-item>
<list-item>
<p>Set the new solution <bold>x</bold><sup><bold>(1)</bold></sup> as the new initial solution and repeat from 3 until the desired number of solutions has been reached or until there are no more solutions.</p>
</list-item>
</list>
<p><bold>Algorithm 1</bold> <monospace>Diversity-enum</monospace> algorithm</p>
<p specific-use="line">1: <bold>procedure</bold> <monospace>D<sc>iversity</sc>_<sc>enum</sc></monospace>(iters, <italic>d</italic><sub><italic>s</italic></sub>, <italic>f</italic>)</p>
<p specific-use="line">2: <inline-formula id="pcbi.1008730.e017"><alternatives><graphic id="pcbi.1008730.e017g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e017" xlink:type="simple"/><mml:math display="inline" id="M17"><mml:mrow><mml:msubsup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi mathvariant="bold-italic">r</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mn mathvariant="bold">0</mml:mn> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup> <mml:mo>,</mml:mo> <mml:mo>…</mml:mo> <mml:mo>,</mml:mo> <mml:msubsup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi mathvariant="bold-italic">r</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">k</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup> <mml:mo>←</mml:mo> <mml:mtext>initial</mml:mtext><mml:mspace width="4pt"/><mml:mtext>solutions</mml:mtext><mml:mspace width="4pt"/><mml:mtext>with</mml:mtext><mml:mspace width="4pt"/><mml:mtext>the</mml:mtext><mml:mspace width="4pt"/><mml:mtext>reaction</mml:mtext> <mml:mo>-</mml:mo> <mml:mtext>enum</mml:mtext><mml:mspace width="4pt"/><mml:mtext>method</mml:mtext></mml:mrow></mml:math></alternatives></inline-formula></p>
<p specific-use="line">3: <italic>i</italic> ← 0</p>
<p specific-use="line">4: <inline-formula id="pcbi.1008730.e018"><alternatives><graphic id="pcbi.1008730.e018g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e018" xlink:type="simple"/><mml:math display="inline" id="M18"><mml:mrow><mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">i</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msup> <mml:mo>←</mml:mo> <mml:msubsup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi mathvariant="bold-italic">r</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">k</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup></mml:mrow></mml:math></alternatives></inline-formula></p>
<p specific-use="line">5: <italic>z</italic>* = <italic>f</italic>(<bold><italic>x</italic></bold><sup><bold>(i)</bold></sup>)</p>
<p specific-use="line">6: <bold>while</bold> <italic>i</italic> &lt; <italic>iters</italic> and <italic>f</italic>(<bold><italic>x</italic></bold><sup><bold>(<italic>i</italic>)</bold></sup>) = <italic>z</italic>* <bold>do</bold></p>
<p specific-use="line">7:  <bold><italic>y</italic></bold><sup><bold>(<italic>i</italic>)</bold></sup> ← vector of 0s of same size as <bold><italic>x</italic></bold><sup><bold>(<italic>i</italic>)</bold></sup></p>
<p specific-use="line">8:  <italic>pick</italic>_<italic>prob</italic> ← 1 − <italic>exp</italic>(<italic>d</italic><sub><italic>s</italic></sub>, <italic>i</italic>) # where <italic>exp</italic>(<italic>a</italic>, <italic>b</italic>) = <italic>a</italic><sup><italic>b</italic></sup></p>
<p specific-use="line">9:  <bold>for</bold> <inline-formula id="pcbi.1008730.e019"><alternatives><graphic id="pcbi.1008730.e019g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e019" xlink:type="simple"/><mml:math display="inline" id="M19"><mml:mrow><mml:mi>j</mml:mi> <mml:mo>∣</mml:mo> <mml:msubsup><mml:mi>x</mml:mi> <mml:mi>j</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi>i</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup> <mml:mo>=</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula> <bold>do</bold></p>
<p specific-use="line">10:   <monospace>sample</monospace> <inline-formula id="pcbi.1008730.e020"><alternatives><graphic id="pcbi.1008730.e020g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e020" xlink:type="simple"/><mml:math display="inline" id="M20"><mml:mrow><mml:mi>u</mml:mi> <mml:mo>∼</mml:mo> <mml:mi mathvariant="script">U</mml:mi> <mml:mo>(</mml:mo> <mml:mn>0</mml:mn> <mml:mo>,</mml:mo> <mml:mn>1</mml:mn> <mml:mo>)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula></p>
<p specific-use="line">11:   <bold>if</bold> <italic>u</italic> ≤ <italic>pick</italic>_<italic>prob</italic> <bold>then</bold></p>
<p specific-use="line">12:    <inline-formula id="pcbi.1008730.e021"><alternatives><graphic id="pcbi.1008730.e021g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e021" xlink:type="simple"/><mml:math display="inline" id="M21"><mml:mrow><mml:msubsup><mml:mi>y</mml:mi> <mml:mi>j</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi>i</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup> <mml:mo>←</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:math></alternatives></inline-formula></p>
<p specific-use="line">13:  <bold><italic>s</italic></bold> ← solve maxdist MILP <inline-formula id="pcbi.1008730.e022"><alternatives><graphic id="pcbi.1008730.e022g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e022" xlink:type="simple"/><mml:math display="inline" id="M22"><mml:mrow><mml:munder><mml:mrow><mml:mtext>min</mml:mtext></mml:mrow> <mml:mstyle mathvariant="bold-italic"><mml:mi>s</mml:mi></mml:mstyle></mml:munder> <mml:mspace width="1pt"/> <mml:mi>g</mml:mi> <mml:mo>(</mml:mo> <mml:mrow><mml:mstyle mathvariant="bold-italic"><mml:mi>s</mml:mi></mml:mstyle> <mml:mo>,</mml:mo> <mml:msup><mml:mstyle mathvariant="bold-italic"><mml:mi>y</mml:mi></mml:mstyle> <mml:mrow><mml:mo stretchy="false">(</mml:mo> <mml:mstyle mathvariant="bold-italic"><mml:mi>i</mml:mi></mml:mstyle> <mml:mo stretchy="false">)</mml:mo></mml:mrow></mml:msup></mml:mrow> <mml:mo>)</mml:mo></mml:mrow></mml:math></alternatives></inline-formula> (<xref ref-type="disp-formula" rid="pcbi.1008730.e016">Eq 6</xref>)</p>
<p specific-use="line">14:  <italic>i</italic> ← <italic>i</italic> + 1</p>
<p specific-use="line">15:  <bold><italic>x</italic></bold><sup><bold>(<italic>i</italic>)</bold></sup> ← <bold><italic>s</italic></bold></p>
<p specific-use="line">16: <bold>return</bold> <inline-formula id="pcbi.1008730.e023"><alternatives><graphic id="pcbi.1008730.e023g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e023" xlink:type="simple"/><mml:math display="inline" id="M23"><mml:mrow><mml:msubsup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi mathvariant="bold-italic">r</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mn mathvariant="bold">0</mml:mn> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup> <mml:mo>,</mml:mo> <mml:mo>…</mml:mo> <mml:mo>,</mml:mo> <mml:msubsup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mi mathvariant="bold-italic">r</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">n</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msubsup> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mn mathvariant="bold">1</mml:mn> <mml:mo>)</mml:mo></mml:mrow></mml:msup> <mml:mo>,</mml:mo> <mml:mo>…</mml:mo> <mml:mo>,</mml:mo> <mml:msup><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">i</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:msup></mml:mrow></mml:math></alternatives></inline-formula>.</p>
<p>A detailed version of the algorithm is described in Alg. 1. <monospace>Diversity-enum</monospace> combines the advantages of the previous techniques. It starts computing an initial set of solutions using the <monospace>Reaction-enum</monospace> method avoiding duplicated solutions. This guarantees that single variations of reactions across all pathways are explored, as long as this initial set of solutions is large enough (i.e., all reactions of the network are traversed to generate alternative solutions). Then, starting for any solution from this initial set, the algorithm explores solutions in the vicinity of the selected solution, using it as a <italic>template</italic>, for which only a subset of the reactions present in the selected solution are used to maximize the distance to the new solution. The more reactions that are selected to maximize the distance, the more different the new solution found will be from the selected one. The number of selected reactions from the template at each iteration (i.e., the distance of the next solution with respect the previous one) is controlled by the parameter <italic>d</italic><sub><italic>s</italic></sub> ∈ [0, 1]. For example, using a <italic>d</italic><sub><italic>s</italic></sub> value close to one (e.g. <italic>d</italic><sub><italic>s</italic></sub> = 0.99), the distance of the solution obtained at iteration 70 with respect the previous one (obtained in the iteration 69), is going to be 1 − 0.99<sup>70</sup> ≈ 0.5, that is, the expected distance of the next optimal solution with respect the previous one is half of the possible maximal distance. At iteration 1,000, this value is 1 − 0.99<sup>1000</sup> ≈ 1, and so the algorithm is now searching for the most distant solution, as <monospace>Maxdist</monospace> does. Using a higher value of <italic>d</italic><sub><italic>s</italic></sub> (e.g. <italic>d</italic><sub><italic>s</italic></sub> = 0.999) makes this transition from near to far solutions slower, since this time the value at iteration 70 is only 1 − 0.999<sup>70</sup> ≈ 0.07. It should be noted that if <italic>d</italic><sub><italic>s</italic></sub> = 0, after computing the initial set of solutions, <monospace>Diversity-enum</monospace> behaves exactly as <monospace>Maxdist-enum</monospace>. By default, the value of this parameter has been set to <italic>d</italic><sub><italic>s</italic></sub> = 0.995, and all experiments performed, unless otherwise indicated, have been done with this value.</p>
<p>Some preliminary experiments that we carried out suggest that it is preferable to start with the initial set of solutions using the <monospace>Reaction-enum</monospace> method and expand it by progressively looking for more distant solutions, rather than the other way around. The reason is that if we start with the most extreme solutions, as we progressively decrease the distance, the effect we get is to explore solutions that are closer to each other but still located in the extremes of the space, and still far from the initial solutions.</p>
</sec>
<sec id="sec009">
<title>Measuring diversity</title>
<p>Given the unknown volume of the 0/1-polytope comprising the optimal solutions, it is not possible to directly estimate its size without sampling solutions from it. In order to measure how diverse are the set of solutions obtained with different methods, we need to rely instead on indirect measures. Since solutions are indexed by 0/1 coordinates, one reasonable metric to use is the Hamming distance:
<disp-formula id="pcbi.1008730.e024"><alternatives><graphic id="pcbi.1008730.e024g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e024" xlink:type="simple"/><mml:math display="block" id="M24"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msub><mml:mi>δ</mml:mi> <mml:mi>h</mml:mi></mml:msub> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>,</mml:mo> <mml:mi mathvariant="bold-italic">y</mml:mi> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>=</mml:mo> <mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mo>|</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mstyle> <mml:munderover><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>i</mml:mi> <mml:mo>=</mml:mo> <mml:mn>1</mml:mn></mml:mrow> <mml:mi>n</mml:mi></mml:munderover> <mml:mo>|</mml:mo> <mml:msub><mml:mi>x</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>-</mml:mo> <mml:msub><mml:mi>y</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mo>|</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(7)</label></disp-formula></p>
<p>For each pair of solution vectors <bold><italic>x</italic></bold>, <bold><italic>y</italic></bold> ∈ {0, 1}<sup><italic>n</italic></sup> obtained from the set of optimal solutions <italic>Ω</italic>*, we compute the Hamming distance (i.e., how many reactions are different between any two solutions) and we average across all the distances between any two solutions to obtain the <italic>average pairwise distance</italic> <inline-formula id="pcbi.1008730.e025"><alternatives><graphic id="pcbi.1008730.e025g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e025" xlink:type="simple"/><mml:math display="inline" id="M25"><mml:mover accent="true"><mml:msub><mml:mi>δ</mml:mi> <mml:mi>h</mml:mi></mml:msub> <mml:mo>¯</mml:mo></mml:mover></mml:math></alternatives></inline-formula>. One way to promote diversity is to maximize this measurement: between two different sets of optimal solutions (of a similar size), the set with a larger average pairwise distance contains solutions that are, on average, more diverse. However, relying only on the average pairwise distance might not be informative enough in some situations, since two groups of solutions that are very different between groups but very similar within groups, can have a large average pairwise distance driven by the distance between groups, even thought the diversity is low within groups. Under these circumstances, it is easy to have the false impression that the set of solutions is diverse, but instead it will contain only the two initial different solutions with very small variations.</p>
<p>To discriminate better between these situations, we use also the <italic>average nearest neighbor distance</italic> <inline-formula id="pcbi.1008730.e026"><alternatives><graphic id="pcbi.1008730.e026g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e026" xlink:type="simple"/><mml:math display="inline" id="M26"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula> defined as:
<disp-formula id="pcbi.1008730.e027"><alternatives><graphic id="pcbi.1008730.e027g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e027" xlink:type="simple"/><mml:math display="block" id="M27"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup> <mml:mrow><mml:mo>(</mml:mo> <mml:mi>O</mml:mi> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>=</mml:mo> <mml:mstyle displaystyle="true" scriptlevel="0"><mml:mfrac><mml:mn>1</mml:mn><mml:mrow><mml:mo>|</mml:mo> <mml:mi>S</mml:mi> <mml:mo>|</mml:mo></mml:mrow></mml:mfrac></mml:mstyle> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>∈</mml:mo> <mml:mi>O</mml:mi></mml:mrow></mml:munder> <mml:mspace width="4pt"/><mml:munder><mml:mo form="prefix" movablelimits="true">min</mml:mo> <mml:mrow><mml:mi mathvariant="bold-italic">y</mml:mi> <mml:mo>∈</mml:mo> <mml:mi>O</mml:mi> <mml:mo>\</mml:mo> <mml:mo>{</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>}</mml:mo></mml:mrow></mml:munder> <mml:msub><mml:mi>δ</mml:mi> <mml:mi>h</mml:mi></mml:msub> <mml:mrow><mml:mo>(</mml:mo> <mml:mi mathvariant="bold-italic">x</mml:mi> <mml:mo>,</mml:mo> <mml:mi mathvariant="bold-italic">y</mml:mi> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(8)</label></disp-formula></p>
<p>That is, for each optimal solution in the solution set <italic>O</italic> ⊆ <italic>Ω</italic>* obtained with some enumeration method, we measure the distance to all other solutions and we take the distance to its closest solution (nearest neighbor). Then, we average all those distances to have the average nearest neighbor distance.</p>
<p>The average nearest neighbor distance measures how <italic>spread</italic> the solutions are. We want solutions that are spread to cover a wider range of different solutions and avoid the enumeration of clusters of very similar solutions.</p>
<p>Considering these two metrics, we can devise four situations when comparing the solution sets obtained by different methods:</p>
<list list-type="bullet">
<list-item>
<p><bold>Lower</bold> <inline-formula id="pcbi.1008730.e028">
<alternatives>
<graphic id="pcbi.1008730.e028g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e028" xlink:type="simple"/>
<mml:math display="inline" id="M28">
<mml:msub>
<mml:mover accent="true">
<mml:mi>δ</mml:mi>
<mml:mo>¯</mml:mo>
</mml:mover>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:math>
</alternatives>
</inline-formula> and <bold>lower</bold> <inline-formula id="pcbi.1008730.e029"><alternatives><graphic id="pcbi.1008730.e029g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e029" xlink:type="simple"/><mml:math display="inline" id="M29"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula>: this situation corresponds to a low diversity. Solutions are close together and sampled from a small region of the search space.</p>
</list-item>
<list-item>
<p><bold>Larger</bold> <inline-formula id="pcbi.1008730.e030">
<alternatives>
<graphic id="pcbi.1008730.e030g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e030" xlink:type="simple"/>
<mml:math display="inline" id="M30">
<mml:msub>
<mml:mover accent="true">
<mml:mi>δ</mml:mi>
<mml:mo>¯</mml:mo>
</mml:mover>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:math>
</alternatives>
</inline-formula> and <bold>lower</bold> <inline-formula id="pcbi.1008730.e031"><alternatives><graphic id="pcbi.1008730.e031g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e031" xlink:type="simple"/><mml:math display="inline" id="M31"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula>: low dispersion of the solutions, even though solutions are distant from each other.</p>
</list-item>
<list-item>
<p><bold>Lower</bold> <inline-formula id="pcbi.1008730.e032">
<alternatives>
<graphic id="pcbi.1008730.e032g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e032" xlink:type="simple"/>
<mml:math display="inline" id="M32">
<mml:msub>
<mml:mover accent="true">
<mml:mi>δ</mml:mi>
<mml:mo>¯</mml:mo>
</mml:mover>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:math>
</alternatives>
</inline-formula> and <bold>larger</bold> <inline-formula id="pcbi.1008730.e033"><alternatives><graphic id="pcbi.1008730.e033g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e033" xlink:type="simple"/><mml:math display="inline" id="M33"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula>: solutions are dispersed but only in a smaller region of the search space.</p>
</list-item>
<list-item>
<p><bold>Larger</bold> <inline-formula id="pcbi.1008730.e034">
<alternatives>
<graphic id="pcbi.1008730.e034g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e034" xlink:type="simple"/>
<mml:math display="inline" id="M34">
<mml:msub>
<mml:mover accent="true">
<mml:mi>δ</mml:mi>
<mml:mo>¯</mml:mo>
</mml:mover>
<mml:mi>h</mml:mi>
</mml:msub>
</mml:math>
</alternatives>
</inline-formula> and <bold>larger</bold> <inline-formula id="pcbi.1008730.e035"><alternatives><graphic id="pcbi.1008730.e035g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e035" xlink:type="simple"/><mml:math display="inline" id="M35"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula>: better diverse set of solutions in which solutions are scattered across the space of optimal networks.</p>
</list-item>
</list>
<p>Although simple, these metrics provide an idea of how different the solutions enumerated by the methods are.</p>
</sec>
<sec id="sec010">
<title>Essential gene prediction and metabolic network ensembles</title>
<p>Context-specific metabolic networks can be used to make predictions about the metabolism of a cell or tissue in a specific experimental condition. Of a particular interest is the prediction of essential genes. An essential gene is a gene whose totally or partially inactivation prevents the organism to growth or survive. Some genes are absolutely required for survival, whereas other genes are conditionally essential, meaning that they are essential depending on the environmental conditions. For example, the gene ARG2, which encodes glutamate N-acetyltransferase —a mitochondrial enzyme that catalyzes the first step in the biosynthesis of the arginine— is annotated as a essential gene in <italic>Saccharomyces cerevisiae</italic> (<ext-link ext-link-type="uri" xlink:href="https://www.yeastgenome.org/locus/S000003607" xlink:type="simple">https://www.yeastgenome.org/locus/S000003607</ext-link>) only in the absence of arginine in the medium.</p>
<p>Many essential genes that are related to metabolism (those related to enzymes) can be predicted using metabolic networks. However, conditionally essential genes are particularly hard to predict since they cannot be predicted without integrating experimental data or knowledge related to the condition. Context-specific metabolic networks are able to predict them indirectly, by extracting first the sub-network which is most consistent with the experimental data. After removing all the reactions that are predicted to be inactive in a given context, conditionally essential genes that were not essential in the generic network might be now essential in the reconstructed network.</p>
<p>Predictions of essential genes using metabolic networks can be done by comparing the maximum flux through the biomass reaction —an artificial reaction that encodes the minimum requirements of the organism to sustain a basic metabolic activity— using Flux Balance Analysis (FBA) [<xref ref-type="bibr" rid="pcbi.1008730.ref043">43</xref>] before and after knocking out a gene in the metabolic network. If the flux through the biomass reactions is below a certain threshold after KO (e.g., below 1% with respect to the wild-type) then the gene is considered essential.</p>
<p>However, as explained before in this section, it is common to find more than one optimal context-specific metabolic network for a given condition, each one representing a different hypothesis of the metabolic state. Each network may predict different essential genes. Since all networks fit the experimental data equally well, there is no clear way to decide a priori which of these predictions may be true. In this situation, a reasonable strategy is to consider that if a network predicts a gene to be essential, then the ensemble decides that the gene is essential, in order to maximize the number of true essential genes (at expenses of increasing the false positives), similar to what has been done in [<xref ref-type="bibr" rid="pcbi.1008730.ref024">24</xref>] with Gap-Filling methods.</p>
<p><xref ref-type="fig" rid="pcbi.1008730.g003">Fig 3</xref> shows an example of how the procedure works. For each gene, a KO is simulated by maximizing the flux through the biomass reaction after knocking out the reaction or reactions associated to the gene (based on the Gene-Protein-Reaction rules), using the <monospace>singleGeneDeletion</monospace> method from the COBRA Toolbox [<xref ref-type="bibr" rid="pcbi.1008730.ref027">27</xref>]. If the ratio between the KO and the wild type is below 0.01 (flux after KO below 1%), the gene is classified as essential. This process is repeated for all genes and for all optimal networks, and then results are combined by performing a logical OR of the predictions across networks.</p>
<fig id="pcbi.1008730.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Example of a metabolic network ensemble.</title>
<p>Predictions of the essential genes produced by Net1, Net2 and Net3 are combined by performing a logical OR.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g003" xlink:type="simple"/>
</fig>
<p>After obtaining the predictions for each gene, the True Positive Rate (TPR, sensitivity) and the False Positive Rate (FPR, 1-specificity) are calculated by comparing the predictions against the true essential genes for <italic>Saccharomyces cerevisiae</italic> (included in the repository of the code), and applying the following formula:
<disp-formula id="pcbi.1008730.e036"><alternatives><graphic id="pcbi.1008730.e036g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e036" xlink:type="simple"/><mml:math display="block" id="M36"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi>T</mml:mi> <mml:mi>P</mml:mi> <mml:mi>R</mml:mi> <mml:mo>=</mml:mo> <mml:mfrac><mml:mrow><mml:mi>T</mml:mi> <mml:mi>P</mml:mi></mml:mrow> <mml:mrow><mml:mi>T</mml:mi> <mml:mi>P</mml:mi> <mml:mo>+</mml:mo> <mml:mi>F</mml:mi> <mml:mi>N</mml:mi></mml:mrow></mml:mfrac> <mml:mspace width="1em"/><mml:mrow><mml:mo>(</mml:mo> <mml:mi>T</mml:mi> <mml:mi>P</mml:mi> <mml:mo>=</mml:mo> <mml:mtext>True</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Positives</mml:mtext> <mml:mo>,</mml:mo> <mml:mi>F</mml:mi> <mml:mi>N</mml:mi> <mml:mo>=</mml:mo> <mml:mtext>False</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Negatives</mml:mtext> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(9)</label></disp-formula> <disp-formula id="pcbi.1008730.e037"><alternatives><graphic id="pcbi.1008730.e037g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e037" xlink:type="simple"/><mml:math display="block" id="M37"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mi>F</mml:mi> <mml:mi>P</mml:mi> <mml:mi>R</mml:mi> <mml:mo>=</mml:mo> <mml:mfrac><mml:mrow><mml:mi>F</mml:mi> <mml:mi>P</mml:mi></mml:mrow> <mml:mrow><mml:mi>F</mml:mi> <mml:mi>P</mml:mi> <mml:mo>+</mml:mo> <mml:mi>T</mml:mi> <mml:mi>N</mml:mi></mml:mrow></mml:mfrac> <mml:mspace width="1em"/><mml:mrow><mml:mo>(</mml:mo> <mml:mi>F</mml:mi> <mml:mi>P</mml:mi> <mml:mo>=</mml:mo> <mml:mtext>False</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Positives</mml:mtext> <mml:mo>,</mml:mo> <mml:mi>T</mml:mi> <mml:mi>N</mml:mi> <mml:mo>=</mml:mo> <mml:mtext>True</mml:mtext> <mml:mspace width="4pt"/><mml:mtext>Negatives</mml:mtext> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(10)</label></disp-formula></p>
</sec>
<sec id="sec011">
<title>Data pre-processing</title>
<p>One common step prior to any metabolic reconstruction is pre-processing the experimental data to map it onto the metabolic networks. The way in which this data is pre-processed also depends on the objective of the reconstruction and the type of data used (generally gene expression data). A common approach is to use gene expression data to classify reactions into two groups: reactions for which there is experimental evidence of being active for a given condition, and reactions for which there is not enough evidence.</p>
<p>A simple method that is frequently used for this purpose is based on a prior classification of genes using quantile thresholds on the normalized gene expression [<xref ref-type="bibr" rid="pcbi.1008730.ref019">19</xref>]. In this way, genes whose expression levels are above or below certain quantiles are classified as highly or lowly expressed genes. For example, a thresholds specified as [0.25, 0.75] means that genes whose value are below the 25th percentile are classified as lowly expressed, and genes above the 75th percentile are classified as highly expressed. Afterwards, genes are mapped onto the metabolic network using the Gene-Protein-Reaction rules defined in the GSMN in order to get the reactions associated with highly expressed or lowly expressed genes.</p>
<p>
<xref ref-type="fig" rid="pcbi.1008730.g004">Fig 4</xref> shows an example for a threshold of [0.10, 0.90] on the normalized microarray gene expression of the <italic>Saccharomyces cerevisiae</italic> under aerobic conditions [<xref ref-type="bibr" rid="pcbi.1008730.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref026">26</xref>]. In this example, genes whose expression levels fall above the upper threshold (around a normalized gene expression of 11) are considered highly expressed, whereas genes below a normalized gene expression around 6 are classified as lowly expressed genes.</p>
<fig id="pcbi.1008730.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Example of gene expression thresholds.</title>
<p>The example shows the quantile thresholds [0.10, 0.90] (indicated with dashed lines) on the normalized gene expression levels (RNA-seq) from <italic>Saccharomyces cerevisiae</italic> under aerobic conditions (20.9% oxygen levels) from [<xref ref-type="bibr" rid="pcbi.1008730.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref026">26</xref>]. Genes above the upper threshold are classified as highly expressed genes, whereas genes below the lower threshold are classified as lowly expressed genes.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g004" xlink:type="simple"/>
</fig>
<p>As mentioned before in the section <italic>Methods</italic>, for practical reasons in this work we use the iMAT reconstruction objective as the base reconstruction problem for enumeration. This means that the results we enumerate are those that achieve the optimal trade-off between selection of reactions associated with highly expressed genes and removal of reactions associated with lowly expressed genes.</p>
<p>We use this threshold-based method for the classification of reactions based on gene expression levels because of its simplicity and widespreadness, but other methods could be used instead, for example StanDep [<xref ref-type="bibr" rid="pcbi.1008730.ref044">44</xref>] or Barcode [<xref ref-type="bibr" rid="pcbi.1008730.ref045">45</xref>] (for Affymetrix microarray data). Changing the method changes the set of optimal solutions to the problem, but does not eliminate the problem associated with the enumeration. Analyzing the correctness of the pre-processing technique is beyond the scope of this work, since the problem of enumeration is independent of the pre-processing method (multiple optimal solutions can still exist regardless of the method used).</p>
</sec>
</sec>
<sec id="sec012" sec-type="results">
<title>Results</title>
<p>In Section <italic>Methods</italic> we show how the problem of context-specific metabolic network reconstruction is subject to significant variability due to the vast number of possible optimal metabolic networks that explain the same experimental data. This variability makes the interpretation of the metabolism using a single metabolic network not very reliable, since many equally valid alternative hypotheses are disregarded.</p>
<p>In this section, we analyze the performance of each of the methods implemented in <monospace>DEXOM</monospace> to generate a diverse sample of optimal metabolic networks, assuming that in practice it is not possible to fully enumerate the total unknown space of optimal solutions, as is generally the case. The evaluation is divided into three parts.</p>
<p>First, we evaluate the diversity of the set of solutions discovered with each method in two scenarios: 1) when the true number of possible solutions is known (known ground truth), using the simple Direct Acyclic Graph model introduced in Section <italic>Methods</italic>; and 2) when the number of possible solutions is not known, using the Yeast 6 GSMN [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>] as a biological realistic metabolic network. For the second case, we select random sets of highly expressed and lowly expressed genes from the Yeast 6 model to generate problems in which the total number of optimal solutions is not known a priori, and we compare the samples generated with each method in terms of diversity.</p>
<p>Second, we evaluate the predictive capabilities of each method for in-silico prediction of essential genes. Using real gene expression data for <italic>Saccharomyces cerevisiae</italic> under aerobic conditions [<xref ref-type="bibr" rid="pcbi.1008730.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref026">26</xref>] and the Yeast 6 model [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>], we enumerate thousands of optimal networks with each method and we asses the performance by predicting which genes are essential using both the individual networks and ensembles of networks constructed by combining the predictions of the individual networks.</p>
<p>Finally, we use gene expression data from four different human cancer cell lines and we reconstruct many optimal networks per cell line using different combinations of gene thresholds and methods. We compare the ability of each technique to discover alternative hypothesis of the metabolic state of the cells by performing pathway enrichment on the set of optimal solutions.</p>
<sec id="sec013">
<title>Diversity-enum explores a wider region of the optimal network space</title>
<p>We measure how well each method performs to generate diverse samples of optimal solutions. To do so, we generate samples of fixed size with each method and we measure the diversity of the sample using the average Hamming distance and the average nearest neighbor that were introduced in Section <italic>Methods</italic>. We consider two different scenarios: 1) obtaining a sample of optimal metabolic networks in a simulated scenario where the number of total optimal solutions is known; and 2) obtaining a sample of optimal solutions in realistic scenarios where the total number of optimal solutions is unknown.</p>
<sec id="sec014">
<title>Evaluation in a simulated scenario with a known number of possible optimal solutions</title>
<p>One of the difficulties of measuring the diversity of the solutions obtained by different methods is the absence of a ground truth to compare with, as the full set of optimal solutions is in general not known. However, the DAG network model introduced before can be used as a simple ground truth generator, since the full set of optimal solutions is easy to determine.</p>
<p>In order to assess the coverage and diversity of a sample of optimal networks, we used the DAG network model with 5 layers and 4 metabolites per layer (74 reactions and 22 metabolites in total), which contains a total of 1,024 optimal metabolic networks. The different methods were used to sample from 1 to 250 optimal solutions (around 1/4 of the total set of possible optimal solutions).</p>
<p>
<xref ref-type="fig" rid="pcbi.1008730.g005">Fig 5</xref> shows a low-dimensional projection of the 250 optimal solutions obtained by each method, where each point is an optimal metabolic network encoded as a binary vector. The grey points correspond to the total of 1,024 optimal solutions that exist for this example.</p>
<fig id="pcbi.1008730.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Space of optimal solutions of the DAG network problem explored by each method.</title>
<p>Low dimensional representation of the optimal networks enumerated with different methods. Each method was used to explore a maximum of 250 optimal solutions, out of the 1,024 existent solutions (grey points). Each point represents an optimal metabolic network as a binary vector projected in 2D using UMAP with Hamming distance and 30 neighbors. Both <monospace>Diversity-enum</monospace> and <monospace>Maxdist-enum</monospace> obtain a good diversity of solutions. (E) and (F) show the evolution of the distances in 30 independent runs.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g005" xlink:type="simple"/>
</fig>
<p>The <monospace>Reaction-enum</monospace> method shows a low coverage of the space of optimal solutions, enumerating only a 7% of the full space of optimal networks. This is due to the fact that the <monospace>Reaction-enum</monospace> method changes the bounds of each reaction in the network independently from each other. Since each reaction participates in many optimal solutions, the <monospace>Reaction-enum</monospace> can obtain only a subset of all possible optimal networks, missing a large fraction of optimal metabolic networks that cannot be recovered with this method.</p>
<p>Qualitatively speaking, the 250 solutions obtained with the <monospace>icut-enum</monospace> method are not as spread as the ones obtained with <monospace>Diversity-enum</monospace> and the <monospace>Maxdist-enum</monospace> method. Differences between <monospace>Diversity-enum</monospace> and <monospace>Maxdist-enum</monospace> are less obvious and hard to appreciate in a low dimensional embedding in this example.</p>
<p>In order to have a better picture of the diversity of the solutions, we calculated the evolution of the distances <inline-formula id="pcbi.1008730.e038"><alternatives><graphic id="pcbi.1008730.e038g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e038" xlink:type="simple"/><mml:math display="inline" id="M38"><mml:msub><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi></mml:msub></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1008730.e039"><alternatives><graphic id="pcbi.1008730.e039g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e039" xlink:type="simple"/><mml:math display="inline" id="M39"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula> for each method. We repeated the process 30 times to obtain different samples of 250 solutions. The results for the 30 independent runs are shown in <xref ref-type="fig" rid="pcbi.1008730.g005">Fig 5E and 5F</xref>. The average over the 30 runs is represented with a dashed line.</p>
<p>These figures show in a more clear way how <monospace>Diversity-enum</monospace> obtains the most diverse set with respect the other methods after 150 optimal solutions were enumerated, surpassing the <monospace>Maxdist-enum</monospace> method. It can be seen how the behavior of the algorithm in terms of diversity changes dramatically after the initial solution set is calculated, around solution 50 (this effect is controlled by the <italic>ds</italic> parameter described in the <monospace>Methods</monospace> section). At this point, <monospace>Diversity-enum</monospace> starts to increase the distance progressively, looking for more and more distant solutions, which is reflected in the increase of both <inline-formula id="pcbi.1008730.e040"><alternatives><graphic id="pcbi.1008730.e040g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e040" xlink:type="simple"/><mml:math display="inline" id="M40"><mml:msub><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi></mml:msub></mml:math></alternatives></inline-formula> and <inline-formula id="pcbi.1008730.e041"><alternatives><graphic id="pcbi.1008730.e041g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1008730.e041" xlink:type="simple"/><mml:math display="inline" id="M41"><mml:msubsup><mml:mover accent="true"><mml:mi>δ</mml:mi> <mml:mo>¯</mml:mo></mml:mover> <mml:mi>h</mml:mi> <mml:mrow><mml:mi>n</mml:mi> <mml:mi>n</mml:mi></mml:mrow></mml:msubsup></mml:math></alternatives></inline-formula>. In contrast, <monospace>Reaction-enum</monospace> obtains sets of solutions with a very poor diversity. After calculating 74 solutions, the method cannot generate new optimal networks (since there are only 74 non reversible reactions in the network), and the solution set stops growing. Since the <monospace>Reaction-enum</monospace> generates solutions by modifying the constraints of each reaction, one at a time and independently of each other, solutions are mostly concentrated in a concrete region of the space of possible solutions, which corresponds to solutions that are similar to each other. The <monospace>Maxdist-enum</monospace> method shows at the beginning of the search the largest distance, since the solutions are generated by finding extreme differences. After an initial set of 25 optimal solutions, the average distance stops increasing, but the average nearest neighbor distance continues to decrease. This means that the most distant solutions are discovered at the beginning of the search and then there is less and less distance between new found solutions, something to expect given the reduced number of possible solutions in this example. Whether this small number of solutions (around 2% of the total number of equally valid solutions) is sufficient or not will depend on each particular case (for example, it can be enough to show an example of how extreme results can be in terms of different sets of reactions, but not enough to construct a good ensemble for the prediction of essential genes).</p>
</sec>
<sec id="sec015">
<title>Evaluation in realistic scenarios with an unknown number of optimal solutions</title>
<p>In order to evaluate the diversity in a more biological setting, we randomly select different sets of highly expressed and lowly expressed enzymes of varying size in the Yeast 6 metabolic model [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>] and then we enumerate a maximum of 1,000 optimal metabolic networks with the different methods.</p>
<p><xref ref-type="fig" rid="pcbi.1008730.g006">Fig 6</xref> shows the results of the enumeration of up to 1,000 optimal sub-networks from a randomly selected set of 120 genes highly expressed and 120 genes lowly expressed on Yeast 6. Enumeration of optimal solutions was repeated 10 times for each method. Since in this case the true set of possible optimal solutions is not known, grey dots represent the union of all discovered optimal networks for all the methods.</p>
<fig id="pcbi.1008730.g006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g006</object-id>
<label>Fig 6</label>
<caption>
<title>Diversity of solutions in simulated problems using Yeast 6.</title>
<p>Enumeration of a maximum of 1,000 optimal metabolic networks on Yeast 6 [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>] model, selecting a random set of reactions (120 <italic>R</italic><sub><italic>H</italic></sub> and 120 <italic>R</italic><sub><italic>L</italic></sub>). Enumeration was repeated 10 times for each method, the average is represented with dashed lines in (E) and (F). The grey dots represent the union of all the solutions found by all the methods. <monospace>Diversity-enum</monospace> (A) shows a more homogeneous exploration of the space, exploring not only the distant solutions but also intermediate and close solutions.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g006" xlink:type="simple"/>
</fig>
<p>Again, a similar pattern of dispersion of the optimal solutions can be observed as with the DAG model. <monospace>Diversity-enum</monospace> (<xref ref-type="fig" rid="pcbi.1008730.g006">Fig 6A</xref>) obtains a set of solutions that look well spread across the space of enumerated solutions. The <monospace>Maxdist-enum</monospace> method misses most of the large set of similar solutions that are recovered by the other methods. Both the <monospace>Reaction-enum</monospace> and the <monospace>Icut-enum</monospace> method explore a similar and restricted region of the space, although <monospace>Icut-enum</monospace> can sample more densely from the same region.</p>
<p>Differences between the methods in this more realistic context are more obvious, and <monospace>Diversity-enum</monospace> performs comparatively better than the other methods. After <monospace>Diversity-enum</monospace> generates an initial set of around 600 solutions, both the average distance and the average nearest neighbor distance start to grow surpassing the other methods. A similar pattern can be observed for different random sets of selected genes (<xref ref-type="supplementary-material" rid="pcbi.1008730.s001">S1</xref>, <xref ref-type="supplementary-material" rid="pcbi.1008730.s002">S2</xref> and <xref ref-type="supplementary-material" rid="pcbi.1008730.s003">S3</xref> Figs). The rate at which this distance grows depends on several factors, including: the distance between the initial solutions, the space of possible solutions, and the parameters <italic>d</italic><sub><italic>s</italic></sub>, which controls the rate at which the distance of new solutions increases (<xref ref-type="supplementary-material" rid="pcbi.1008730.s004">S4 Fig</xref>).</p>
</sec>
</sec>
<sec id="sec016">
<title>Prediction of essential genes using ensembles is highly dependent on the strategy used for enumeration</title>
<p>Next, we evaluate the predictive capabilities of the different methods for in-silico prediction of essential genes in the model organism <italic>Saccharomyces cerevisiae</italic>. We used gene expression measured from yeast in aerobic conditions [<xref ref-type="bibr" rid="pcbi.1008730.ref018">18</xref>, <xref ref-type="bibr" rid="pcbi.1008730.ref026">26</xref>]. Genes were classified into expressed and not expressed using different combinations of thresholds on the quantiles of the distribution as it is commonly done in context-specific network reconstruction. For instance, a threshold of [0.25, 0.75] indicates that genes whose normalized expression value are below the quantile 0.25 are classified as lowly expressed, whereas those above the quantile 0.75 are highly expressed. Reactions were splitted into <italic>R</italic><sub><italic>H</italic></sub> and <italic>R</italic><sub><italic>L</italic></sub> sets using the <monospace>mapExpressionToReactions</monospace> method from the COBRA Toolbox [<xref ref-type="bibr" rid="pcbi.1008730.ref027">27</xref>].</p>
<p>Essential genes in Yeast 6 [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>] were curated using most updated information from YDPM (<ext-link ext-link-type="uri" xlink:href="http://www-deletion.stanford.edu/YDPM" xlink:type="simple">http://www-deletion.stanford.edu/YDPM</ext-link>) database and the SGD (<ext-link ext-link-type="uri" xlink:href="https://www.yeastgenome.org" xlink:type="simple">https://www.yeastgenome.org</ext-link>) project [<xref ref-type="bibr" rid="pcbi.1008730.ref047">47</xref>] (<xref ref-type="supplementary-material" rid="pcbi.1008730.s008">S1 File</xref>). Genes that are essential due to mechanisms not directly related to metabolism were excluded from the set, as they cannot be predicted using FBA. In total, 188 genes out of the 900 in Yeast 6 are considered to be essential under aerobic conditions.</p>
<p>A maximum of 2,000 optimal networks were enumerated for each combination of threshold and method, using a time limit of 8h per threshold/method, and 5 min. timeout for each MILP problem. The lower bound of the biomass reaction was constrained to carry a small positive flux, to ensure that all initial sub-networks will allow biomass production and therefore could be used to simulate the effects of gene knockout on the biomass production using FBA. In-silico predictions of essential genes were carried out using COBRA Toolbox v3.0.6 [<xref ref-type="bibr" rid="pcbi.1008730.ref027">27</xref>], classifying each gene as essential if the flux through the biomass reaction was below 1% after KO.</p>
<p>Essential genes were predicted for each optimal network within the set of the optimal networks obtained by each method and threshold, but also for the ensemble of networks, by taking the union of the predictions as shown in <xref ref-type="fig" rid="pcbi.1008730.g003">Fig 3</xref>. That is, if a gene is predicted as essential by a single optimal network from a set of optimal networks enumerated using a given method and threshold, then the gene is classified as essential by the ensemble. Thus, in total, we generated 16 ensembles per method, one for each threshold.</p>
<p>
<xref ref-type="table" rid="pcbi.1008730.t001">Table 1</xref> shows the True Positives Rate (TPR, sensitivity) and False Positive Rate (FPR, 1-specificity) of these ensembles. <monospace>Diversity-enum</monospace> achieves the best TPR for all thresholds, with the best overall TPR of 0.7713 for the threshold [0.25, 0.90], which corresponds to the correct classification of 145 genes out of the 188 essential genes in the dataset. These results are followed by the <monospace>Reaction-enum</monospace> method, which achieves the same TPR as <monospace>Diversity-enum</monospace> in 8 out of 16 tests, with a slightly lower FPR in 6 out of those 8 tests. In contrast, <monospace>Maxdist-enum</monospace> and <monospace>Icut-enum</monospace> ensembles are not very well positioned in terms of TPR, although both methods achieve the lowest rates of false positives for some ensembles. Concretely, the <monospace>Icut-enum</monospace> method obtained the lowest FPR in 9 out of the 16 tests.</p>
<table-wrap id="pcbi.1008730.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.t001</object-id>
<label>Table 1</label>
<caption>
<title>True Positive Rate (TPR) and False Positive Rate (FPR) of the ensembles for the prediction of essential genes in Yeast 6, for the different methods and thresholds.</title>
<p>Ensembles were generated by taking the union of the predictions of all enumerated networks per method and threshold.</p>
</caption>
<alternatives>
<graphic id="pcbi.1008730.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.t001" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center" style="background-color:#F3F3F3">Threshold</th>
<th align="center" style="background-color:#F3F3F3">Method</th>
<th align="center" style="background-color:#F3F3F3">TPR</th>
<th align="center" style="background-color:#F3F3F3">FPR</th>
<th align="center" style="background-color:#F3F3F3">Threshold</th>
<th align="center" style="background-color:#F3F3F3">Method</th>
<th align="center" style="background-color:#F3F3F3">TPR</th>
<th align="center" style="background-color:#F3F3F3">FPR</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="4">0.10 0.90</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.7234</bold></td>
<td align="char" char=".">0.1264</td>
<td align="center" rowspan="4">0.20 0.90</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.7181</bold></td>
<td align="char" char=".">0.1194</td>
</tr>
<tr>
<td align="center">Reaction-enum</td>
<td align="char" char=".">0.7181</td>
<td align="char" char=".">0.1053</td>
<td align="center">Reaction-enum</td>
<td align="char" char=".">0.7128</td>
<td align="char" char=".">0.1025</td>
</tr>
<tr>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.4255</td>
<td align="char" char="."><bold>0.0730</bold></td>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.4734</td>
<td align="char" char=".">0.0969</td>
</tr>
<tr>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.4681</td>
<td align="char" char=".">0.0815</td>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.4628</td>
<td align="char" char="."><bold>0.0576</bold></td>
</tr>
<tr>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.10 0.85</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6755</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0871</td>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.20 0.85</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6649</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0927</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.6649</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0829</td>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6649</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0843</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4521</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0674</td>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.5053</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0702</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.3617</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0604</bold></td>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4096</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0365</bold></td>
</tr>
<tr>
<td align="center" rowspan="4">0.10 0.80</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.7128</bold></td>
<td align="char" char=".">0.0688</td>
<td align="center" rowspan="4">0.20 0.80</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.6755</bold></td>
<td align="char" char=".">0.0520</td>
</tr>
<tr>
<td align="center">Reaction-enum</td>
<td align="char" char="."><bold>0.7128</bold></td>
<td align="char" char=".">0.0716</td>
<td align="center">Reaction-enum</td>
<td align="char" char=".">0.6596</td>
<td align="char" char=".">0.0716</td>
</tr>
<tr>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.4149</td>
<td align="char" char="."><bold>0.0379</bold></td>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.4521</td>
<td align="char" char="."><bold>0.0337</bold></td>
</tr>
<tr>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.4096</td>
<td align="char" char=".">0.0534</td>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.3670</td>
<td align="char" char=".">0.0562</td>
</tr>
<tr><td align="center" rowspan="4" style="background-color:#F3F3F3">0.10 0.75</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6649</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0843</td>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.20 0.75</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6702</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0590</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6649</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0744</td>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6702</bold></td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0520</bold></td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4096</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0548</bold></td>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4096</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0632</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.3723</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0548</bold></td>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.3670</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0534</td>
</tr>
<tr>
<td align="center" rowspan="4">0.15 0.90</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.7287</bold></td>
<td align="char" char=".">0.1053</td>
<td align="center" rowspan="4">0.25 0.90</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.7713</bold></td>
<td align="char" char=".">0.1334</td>
</tr>
<tr>
<td align="center">Reaction-enum</td>
<td align="char" char=".">0.7234</td>
<td align="char" char=".">0.1096</td>
<td align="center">Reaction-enum</td>
<td align="char" char=".">0.7340</td>
<td align="char" char=".">0.1194</td>
</tr>
<tr>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.4681</td>
<td align="char" char="."><bold>0.0758</bold></td>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.5213</td>
<td align="char" char=".">0.1067</td>
</tr>
<tr>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.4628</td>
<td align="char" char=".">0.0983</td>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.4787</td>
<td align="char" char="."><bold>0.0913</bold></td>
</tr>
<tr>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.15 0.85</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.7128</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0829</td>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.25 0.85</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6862</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0885</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.7128</bold></td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0576</bold></td>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.6649</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0843</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.5000</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0927</td>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4574</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0730</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.3617</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0576</bold></td>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4202</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0590</bold></td>
</tr>
<tr>
<td align="center" rowspan="4">0.15 0.80</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.7021</bold></td>
<td align="char" char=".">0.0885</td>
<td align="center" rowspan="4">0.25 0.80</td>
<td align="center">Diversity-enum</td>
<td align="char" char="."><bold>0.6702</bold></td>
<td align="char" char=".">0.0871</td>
</tr>
<tr>
<td align="center">Reaction-enum</td>
<td align="char" char="."><bold>0.7021</bold></td>
<td align="char" char=".">0.0815</td>
<td align="center">Reaction-enum</td>
<td align="char" char="."><bold>0.6702</bold></td>
<td align="char" char=".">0.0576</td>
</tr>
<tr>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.3989</td>
<td align="char" char="."><bold>0.0323</bold></td>
<td align="center">Maxdist-enum</td>
<td align="char" char=".">0.4096</td>
<td align="char" char=".">0.0590</td>
</tr>
<tr>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.3670</td>
<td align="char" char=".">0.0548</td>
<td align="center">Icut-enum</td>
<td align="char" char=".">0.4521</td>
<td align="char" char="."><bold>0.0534</bold></td>
</tr>
<tr>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.15 0.75</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6649</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0506</td>
<td align="center" rowspan="4" style="background-color:#F3F3F3">0.25 0.75</td>
<td align="center" style="background-color:#F3F3F3">Diversity-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6755</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0843</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.6649</bold></td>
<td align="char" char="." style="background-color:#F3F3F3">0.0801</td>
<td align="center" style="background-color:#F3F3F3">Reaction-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.6702</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0801</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4309</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0534</bold></td>
<td align="center" style="background-color:#F3F3F3">Maxdist-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4096</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0604</td>
</tr>
<tr>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4043</td>
<td align="char" char="." style="background-color:#F3F3F3">0.0562</td>
<td align="center" style="background-color:#F3F3F3">Icut-enum</td>
<td align="char" char="." style="background-color:#F3F3F3">0.4096</td>
<td align="char" char="." style="background-color:#F3F3F3"><bold>0.0548</bold></td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>Differences between ensembles can be better assessed by placing each ensemble in a ROC space (<xref ref-type="fig" rid="pcbi.1008730.g007">Fig 7</xref>), in which each point is an ensemble represented by its TPR and FPR. The upper part of the figure is dominated by <monospace>Diversity-enum</monospace> and the <monospace>Reaction-enum</monospace> method, whereas the <monospace>Maxdist-enum</monospace> and <monospace>Icut-enum</monospace> ensembles are characterized by a lower ratio of true and false positives.</p>
<fig id="pcbi.1008730.g007" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g007</object-id>
<label>Fig 7</label>
<caption>
<title>Performance of each ensemble (TPR/FPR) for each method and threshold.</title>
<p>Each point represents the score (FPR, TPR) achieved by each ensemble built using a concrete threshold and enumeration method (data from <xref ref-type="table" rid="pcbi.1008730.t001">Table 1</xref>).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g007" xlink:type="simple"/>
</fig>
<p>One reason that explains these differences between the methods is the systematic generation of alternative solutions by testing every reaction in the model. If one reaction associated to a gene that is essential is not present in any of the set of optimal networks, the gene is not predicted to be essential. However, if there exist at least one optimal solution in which this reaction is present and essential, both <monospace>Reaction-enum</monospace> and <monospace>Diversity-enum</monospace> have more chances to detect it as they are going to test if there exist an optimal network with that reaction being active. <monospace>Maxdist-enum</monospace> and <monospace>Icut-enum</monospace> methods leave many of these solutions unexplored. <monospace>Diversity-enum</monospace>, in contrast, uses the <monospace>Reaction-enum</monospace> strategy to have an initial set of solutions with variations in single reactions, from which it expands the search incrementally, increasing the chances of detecting even more essential genes.</p>
<p>Differences in TPR (<xref ref-type="supplementary-material" rid="pcbi.1008730.s006">S6 Fig</xref>) and FPR (<xref ref-type="supplementary-material" rid="pcbi.1008730.s007">S7 Fig</xref>) for the ensembles show that the individual networks generated by the different methods achieve a similar rate of true positives and false positives, and so the higher rates scored by the ensembles using <monospace>Diversity-enum</monospace> and <monospace>Reaction-enum</monospace> are driven by a more diverse set of predicted essential genes. That is, individual networks enumerated by these methods are able to correctly predict different sets of true essential genes, and so the union of those predictions include a more diverse set of detected essential genes. Concretely, the median TPR for the ensembles generated with <monospace>Diversity-enum</monospace> and <monospace>Reaction-enum</monospace> increase 142% with respect the median TPR of their individual networks, whereas the TPR of the ensembles built with <monospace>Maxdist-enum</monospace> and <monospace>Icut-enum</monospace> increase only 54% and 51% respectively.</p>
<p>In order to test whether the distance parameter <italic>d</italic><sub><italic>s</italic></sub> has some strong impact on the results obtained with <monospace>Diversity-enum</monospace> method, we repeated the same experiment with parameter values <italic>d</italic><sub><italic>s</italic></sub> = 0.990 and <italic>d</italic><sub><italic>s</italic></sub> = 0.999 (<xref ref-type="supplementary-material" rid="pcbi.1008730.s005">S5 Fig</xref>). In both cases, the results obtained are very similar to these results obtained with the default parameter <italic>d</italic><sub><italic>s</italic></sub> = 0.995. This result suggest that most relevant solutions to the problem of prediction of essential genes are concentrated in the same region of the space of optimal solutions that is explored by both <monospace>Reaction-enum</monospace> and <monospace>Diversity-enum</monospace>. This space corresponds to the alternative solutions generated by modifying the constraints of single reactions in the networks (forcing the inclusion or knocking-out the reaction). Since the simulation of essential genes is based on simulating knockouts in the reactions associated with the genes, it is likely that most of the essential genes can be predicted in some of the optimal networks resulting from those variations in single reactions. However, there is still an advantage in using <monospace>Diversity-enum</monospace>, since it expands the initial set of solutions and is able to search for many more than the other technique is not capable of, increasing the probability of detecting more relevant reconstructions.</p>
<p>The computational time of each of the techniques is also different, although it depends on the size of the network and the number of variables (reactions associated with highly and lowly expressed genes). In general, <monospace>Reaction-enum</monospace> is the fastest method, while <monospace>Maxdist-enum</monospace> is the slowest technique, since the optimization problem of looking for the farthest solution at every step has a higher computational cost (<xref ref-type="supplementary-material" rid="pcbi.1008730.s009">S1 Appendix</xref>).</p>
</sec>
<sec id="sec017">
<title>Diversity-enum detects more alternative hypothesis of the possible metabolic state of different human cancer cells</title>
<p>Next, we evaluate the ability to characterize the variability of predictions about which metabolic pathways are most active in different cancer cell lines. To do so, we reconstruct many optimal networks for each cancer cell line and we perform pathway enrichment on each network to see which pathways are more represented in the reconstructed networks than would be expected by chance. Given that there are multiple possible optimal reconstructions per cancer cell line, performing pathway enrichment on each optimal network will give different p-values for each pathway. This variability due to the method can have important implications. For example, by performing pathway enrichment on a single metabolic network, for a given significance level (e.g. <italic>α</italic> = 0.05) we can detect that pathway A is enriched whereas pathway B is not. However, if we enumerate the space of optimal solutions, we can find an alternative solution in which pathway B is enriched but pathway A is not. Reporting pathway enrichment p-values of a single context-specific metabolic network without characterizing the variability should be in general avoided, as these values are misleading.</p>
<p>In order to test the variability in pathway enrichment scores due to the alternative set of optimal solutions, we used data from [<xref ref-type="bibr" rid="pcbi.1008730.ref019">19</xref>] for melanoma cells (cell line A375) and leukemia cells (HL60, K562, and KBM7 cell lines) and the human Recon 1 model [<xref ref-type="bibr" rid="pcbi.1008730.ref048">48</xref>]. <xref ref-type="table" rid="pcbi.1008730.t002">Table 2</xref> shows the enrichment results for the reconstructions using two different gene expression thresholds. Column <italic>#Nets</italic> shows the number of optimal networks that each method was able to enumerate (in a time limit of 8 hours). Column <italic>#Enr.</italic> shows the number of different enriched pathways (adjusted p-value &lt; 0.05) that were detected by each method. It is important to remark that here, detecting more enriched pathways is better, since all the methods explore the same optimal solutions (all methods for enumeration maximize the same objective function and use the same experimental data). Detecting less enriched pathways means that there exist some other alternative metabolic networks that are enriched for other pathways but the enumeration method missed it, reporting that no enrichment was detected in any of the enumerated metabolic networks.</p>
<table-wrap id="pcbi.1008730.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.t002</object-id>
<label>Table 2</label>
<caption>
<title>Number of optimal networks (#Nets) in a time limit of 8 hours, and number of different enriched metabolic pathways (#Enr. i.e., pathways with p-value &lt; 0.05 using the one-sided Fisher’s exact test for over-representation, corrected for multiple hypothesis comparisons using the Benjamini-Hochberg procedure) for each cell line and gene threshold.</title>
</caption>
<alternatives>
<graphic id="pcbi.1008730.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.t002" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" rowspan="2" style="background-color:#EFEFEF">Cell line</th>
<th align="left" rowspan="2" style="background-color:#EFEFEF">Threshold</th>
<th align="center" colspan="2" style="background-color:#EFEFEF">Diversity-enum</th>
<th align="center" colspan="2" style="background-color:#EFEFEF">Reaction-enum</th>
<th align="center" colspan="2" style="background-color:#EFEFEF">Icut-enum</th>
<th align="center" colspan="2" style="background-color:#EFEFEF">Maxdist-enum</th>
</tr>
<tr>
<th align="center" style="background-color:#EFEFEF">#Nets</th>
<th align="center" style="background-color:#EFEFEF">#Enr.</th>
<th align="center" style="background-color:#EFEFEF">#Nets</th>
<th align="center" style="background-color:#EFEFEF">#Enr.</th>
<th align="center" style="background-color:#EFEFEF">#Nets</th>
<th align="center" style="background-color:#EFEFEF">#Enr.</th>
<th align="center" style="background-color:#EFEFEF">#Nets</th>
<th align="center" style="background-color:#EFEFEF">#Enr.</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">A375</td>
<td align="left">[0.10, 0.90]</td>
<td align="right">2933</td>
<td align="right"><bold>31</bold></td>
<td align="right">2230</td>
<td align="right">28</td>
<td align="right">2804</td>
<td align="right">23</td>
<td align="right">3002</td>
<td align="right">27</td>
</tr>
<tr>
<td align="left">A375</td>
<td align="left">[0.25, 0.75]</td>
<td align="right">1439</td>
<td align="right"><bold>12</bold></td>
<td align="right">2278</td>
<td align="right">11</td>
<td align="right">892</td>
<td align="right">11</td>
<td align="right">1364</td>
<td align="right">11</td>
</tr>
<tr>
<td align="left" style="background-color:#EFEFEF">HL60</td>
<td align="left" style="background-color:#EFEFEF">[0.10, 0.90]</td>
<td align="right" style="background-color:#EFEFEF">2855</td>
<td align="right" style="background-color:#EFEFEF"><bold>28</bold></td>
<td align="right" style="background-color:#EFEFEF">2200</td>
<td align="right" style="background-color:#EFEFEF"><bold>28</bold></td>
<td align="right" style="background-color:#EFEFEF">3004</td>
<td align="right" style="background-color:#EFEFEF">23</td>
<td align="right" style="background-color:#EFEFEF">3001</td>
<td align="right" style="background-color:#EFEFEF">27</td>
</tr>
<tr>
<td align="left" style="background-color:#EFEFEF">HL60</td>
<td align="left" style="background-color:#EFEFEF">[0.25, 0.75]</td>
<td align="right" style="background-color:#EFEFEF">1450</td>
<td align="right" style="background-color:#EFEFEF"><bold>13</bold></td>
<td align="right" style="background-color:#EFEFEF">2290</td>
<td align="right" style="background-color:#EFEFEF"><bold>13</bold></td>
<td align="right" style="background-color:#EFEFEF">752</td>
<td align="right" style="background-color:#EFEFEF">9</td>
<td align="right" style="background-color:#EFEFEF">1223</td>
<td align="right" style="background-color:#EFEFEF">12</td>
</tr>
<tr>
<td align="left">K562</td>
<td align="left">[0.10, 0.90]</td>
<td align="right">2934</td>
<td align="right"><bold>29</bold></td>
<td align="right">2208</td>
<td align="right">25</td>
<td align="right">3006</td>
<td align="right">25</td>
<td align="right">1835</td>
<td align="right">28</td>
</tr>
<tr>
<td align="left">K562</td>
<td align="left">[0.25, 0.75]</td>
<td align="right">1406</td>
<td align="right">12</td>
<td align="right">2283</td>
<td align="right"><bold>16</bold></td>
<td align="right">532</td>
<td align="right">11</td>
<td align="right">1274</td>
<td align="right">13</td>
</tr>
<tr>
<td align="left" style="background-color:#EFEFEF">KBM7</td>
<td align="left" style="background-color:#EFEFEF">[0.10, 0.90]</td>
<td align="right" style="background-color:#EFEFEF">2876</td>
<td align="right" style="background-color:#EFEFEF"><bold>27</bold></td>
<td align="right" style="background-color:#EFEFEF">2150</td>
<td align="right" style="background-color:#EFEFEF">25</td>
<td align="right" style="background-color:#EFEFEF">2914</td>
<td align="right" style="background-color:#EFEFEF">22</td>
<td align="right" style="background-color:#EFEFEF">3000</td>
<td align="right" style="background-color:#EFEFEF">26</td>
</tr>
<tr>
<td align="left" style="background-color:#EFEFEF">KBM7</td>
<td align="left" style="background-color:#EFEFEF">[0.25, 0.75]</td>
<td align="right" style="background-color:#EFEFEF">1793</td>
<td align="right" style="background-color:#EFEFEF"><bold>13</bold></td>
<td align="right" style="background-color:#EFEFEF">2206</td>
<td align="right" style="background-color:#EFEFEF"><bold>13</bold></td>
<td align="right" style="background-color:#EFEFEF">1384</td>
<td align="right" style="background-color:#EFEFEF">12</td>
<td align="right" style="background-color:#EFEFEF">1892</td>
<td align="right" style="background-color:#EFEFEF">13</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>Overall, the method <italic>Diversity-enum</italic> is able to discover more alternative hypotheses about the pathways that are most active in each cell line, especially for the threshold [0.10, 0.90]. One of the reasons why there are more pathways that can be enriched is that, with this threshold, far fewer genes are classified as expressed and not expressed, and therefore many reactions of the metabolic network remain unscored (they may or may not be active without affecting the optimality of the solution). This makes it much more likely to find alternative sub-networks whose flux is consistent for the selected genes. In other words, the fewer genes that are identified as expressed or not expressed (less constraints), the more possible hypotheses about the metabolic state are consistent with the data. This is especially relevant in studies using proteomic or exometabolomic data, where the number of identified proteins or metabolites is lower than in gene expression assays.</p>
<p>Among some of the differences, <monospace>Diversity-enum</monospace> detected the metabolic pathways <italic>Fatty acid activation</italic>, <italic>Fatty acid elongation</italic>, <italic>Fatty acid oxidation</italic> and <italic>Carnitine shuttle</italic> enriched in the reconstructions for the A375 cell line both for the
thresholds [0.10, 0.90] (<xref ref-type="fig" rid="pcbi.1008730.g008">Fig 8</xref>) and [0.25, 0.75] (<xref ref-type="fig" rid="pcbi.1008730.g009">Fig 9</xref>), whereas <monospace>Reaction-enum</monospace> detected them enriched only for the threshold [0.10, 0.90]. The single case where <monospace>Reaction-enum</monospace> discovered more existing alternative solutions with enrichment in other pathways not detected by <monospace>Diversity-enum</monospace> was for the cell <italic>K562</italic> for the threshold [0.25, 0.75], where was able to discover the alternative optimal solutions where pathways <italic>Ubiquinone Biosynthesis, Cysteine Metabolism, Aminosugar Metabolism, and Urea cycle/amino group metabolism</italic> were enriched in at least one of the optimal solutions enumerated with this method. <monospace>Icut-enum</monospace> was the strategy that obtained the worst results, not being able to find many of the optimal solutions with enrichment in other pathways that were discovered by the other methods. The <monospace>Maxdist-enum</monospace> method, although it detects in general less variation, finds in some cases enrichment in pathways that are not detected by any of the other techniques (e.g. Pentose and Glucoronate Interconversions in A375 for the threshold [0.10, 0.90], <xref ref-type="fig" rid="pcbi.1008730.g008">Fig 8</xref>). This could indicate that, although all solutions are equally valid, due to the topology of the network, the pattern of the distribution of highly expressed and low expressed genes across the network, and other factors such as the type of algorithms used by the solvers, there are certain types of solutions that are more frequently discovered than others, and therefore are more biased towards the discovery of this type of solutions. In these cases, the Diversity-enum <italic>d</italic><sub><italic>s</italic></sub> parameter could be decreased to make the search spend more time exploring distant solutions.</p>
<fig id="pcbi.1008730.g008" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g008</object-id>
<label>Fig 8</label>
<caption>
<title>Pathway enrichment results for the cell line A375 (human melanoma), threshold [0.10, 0.90].</title>
<p>Dark purple indicates that the method detected at least one optimal metabolic network for which the corresponding pathway was overrepresented (p-value &lt; 0.05, B-H adjusted one-sided Fisher’s exact test). Only pathways from Recon 1 that were enriched in some optimal solution (by any of the methods) are shown.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g008" xlink:type="simple"/>
</fig>
<fig id="pcbi.1008730.g009" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1008730.g009</object-id>
<label>Fig 9</label>
<caption>
<title>Pathway enrichment results for the cell line A375 (human melanoma), threshold [0.25, 0.75].</title>
<p>Dark purple indicates that the method detected at least one optimal metabolic network for which the corresponding pathway was overrepresented (p-value &lt; 0.05, B-H adjusted one-sided Fisher’s exact test). Only pathways from Recon 1 that were enriched in some optimal solution (by any of the methods) are shown.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.g009" xlink:type="simple"/>
</fig>
<p>We also analyzed how often each pathway was detected as enriched by any optimal metabolic network for each enumeration method (<xref ref-type="supplementary-material" rid="pcbi.1008730.s009">S1 Appendix</xref>). Among some of the possible causes that may affect this variability, the number of selected highly and lowly expressed has a clear impact in the results, for the reasons discussed before. Using a threshold of [0.10, 0.90], it can be seen that this variability is greater than for the threshold [0.25, 0.75], simply due to the fact that in the first case only 20% of the genes are used, and more hypothesis are consistent with the data. The enumeration of alternative reconstructions helps to characterize this variability and reduces the risk of incorrectly discarding hypotheses that are equally valid for the same reconstruction method and the same data.</p>
</sec>
</sec>
<sec id="sec018" sec-type="conclusions">
<title>Discussion</title>
<p>Context-specific metabolic network reconstruction is a widely used approach to integrate different layers of experimental data into metabolic networks. This process allows to capture the metabolic sub-network that corresponds to the active part of the metabolism of an organism in a given condition. Using this reconstructed model, more advanced techniques such as Flux Balance Analysis (FBA), pathway enrichment, network visualization or gene essentiality prediction can be used to get an integrated view of the metabolic behavior.</p>
<p>One important limitation with this methodology is that context-specific metabolic network reconstruction is subject to significant variability due to the large number of optimal metabolic networks that can be reconstructed for the same experimental data, among other factors. This variability, which is commonly neglected, can contain relevant information and can offer alternative hypothesis of the metabolic state in terms of different combinations of reactions that are predicted to be active or inactive. Thus, the report of results using only a single optimal context-specific metabolic network can be highly biased and can overlook information relevant to the experiment. While this is an important issue, the analysis of the alternative set metabolic networks is a topic not well explored.</p>
<p>In this study we analyze the problem of enumeration of multiple optimal context-specific metabolic networks both from a theoretical and practical perspective. We show how it is common to have multiple different context-specific metabolic networks that optimally explain the same observed experimental data. The set of optimal solutions constitute different hypotheses of the metabolic state and therefore must be taken into account to reduce bias in the interpretation of results.</p>
<p>We propose four different methods for enumeration of context-specific reconstructions (<monospace>Reaction-enum</monospace>, <monospace>Icut-enum</monospace>, <monospace>Maxdist-enum</monospace> and <monospace>Diversity-enum</monospace>) that we developed and integrated in an unified open-source library called <monospace>DEXOM</monospace>. The first three methods are improvements of previous ideas that we have adapted and improved for the particular case of the enumeration of context-specific metabolic networks, whereas <monospace>Diversity-enum</monospace> is a novel method for enumeration of optimal solutions that maximizes incrementally the diversity.</p>
<p>We evaluate the methods focusing on two main aspects: 1) diversity of the optimal solutions obtained with each method, using two different distance metrics and UMAP plots to evaluate the spreading of the solutions; and 2) the biological relevance of alternative optimal solutions by assessing the predictive capabilities with real data. For this second aspect, we evaluate, on the one hand, the improvement in in-silico predictions of essential genes in <italic>Saccharomyces cerevisiae</italic> using ensembles of diverse metabolic network, and on the other hand, the detection of alternative enriched pathways in human cancer cells, as a way to measure the variability of different hypotheses about the metabolic state that are compatible with the experimental data.</p>
<p>In terms of distance metrics and the spread of solutions, both <monospace>Diversity-enum</monospace> and <monospace>Maxdist-enum</monospace> achieve good results, although <monospace>Diversity-enum</monospace> explores the solution space in a more homogeneous way than <monospace>Maxdist-enum</monospace>, which looks for more solutions in the extremes. <monospace>Reaction-enum</monospace> has a limited exploration capacity, focusing on similar solutions that represent a small part of the total solution space. <monospace>Icut-enum</monospace>, although capable of enumerating more solutions than <monospace>Reaction-enum</monospace>, does so in a much less diverse manner than <monospace>Diversity-enum</monospace> and <monospace>Maxdist-enum</monospace>, and sometimes even with less diversity than <monospace>Reaction-enum</monospace>, as reflected in the simulations using the Yeast 6 model [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>].</p>
<p>With respect to predictive capabilities of essential genes using the Yeast 6 model, on an individual basis there are not large differences in terms of True Positive Rate (TPR) and False Positive Rate (FPR) between the individual optimal metabolic networks enumerated by each method. However, when the results are combined using ensembles of optimal metabolic networks, the TPR of the ensemble obtained with <monospace>Diversity-enum</monospace> increases by 140% compared to the median TPR of the individual networks, whereas ensembles generated with the methods that generate less diverse sets of solutions achieved only an increment of 50%. <monospace>Diversity-enum</monospace> was also the method with the best overall TPR of 0.7713, which corresponds to 145 out of 188 correctly classified essential genes, for a FPR of 0.1334 (95 false positives out of 712 non essential genes). These differences are explained by a more diverse set of essential genes captured by the individual optimal networks enumerated with <monospace>Diversity-enum</monospace>. This suggests that <monospace>Diversity-enum</monospace> allows to retrieve sub-networks that are more diverse in terms of metabolic pathways that can be used to reach the metabolic state that conforms to the gene expression data, and allows to explore a more diverse metabolic activity that is consistent with the same experimental data.</p>
<p>The technique <monospace>Reaction-enum</monospace> is also able to generate good ensembles, achieving a similar FPR and TPR as <monospace>Diversity-enum</monospace>, while techniques <monospace>Icut-enum</monospace> and <monospace>Maxdist-enum</monospace> obtain much worse results, regardless of the diversity or total number of solutions discovered. This might be explained in part by the fact that the relevant set of solutions for this problem is mostly confined to a small region in the space of optimal solutions, which corresponds to the space that both <monospace>Diversity-enum</monospace> and <monospace>Reaction-enum</monospace> are able to explore.</p>
<p>In terms of alternative hypothesis of the metabolic state of different human cancer cells, results obtained using pathway enrichment on the set of the optimal networks discovered by each method show that <monospace>Diversity-enum</monospace> is able to discover, in almost all cases, more diverse solutions in terms of networks that are enriched for other pathways alternative solutions. These results are again followed by <monospace>Reaction-enum</monospace>, <monospace>Maxdist-enum</monospace> and finally <monospace>Icut-enum</monospace>.</p>
<p>One important limitation of enumerating optimal solutions is the heavy computational cost involved in the search process. If the number of highly expressed genes and lowly expressed genes is very large, obtaining a single optimal metabolic network can be computational demanding or even not feasible in reasonable time, since obtaining an optimal solution involves solving a MILP problem, which is in general NP-Hard. In this context, enumerating multiple optimal solutions can be prohibitively expensive in some cases, especially with techniques like <monospace>Maxdist-enum</monospace> or <monospace>Diversity-enum</monospace>. One thing that can be done in these situations to alleviate the computational burden is to reduce the integer optimality tolerance of the solver to stop looking for better solutions once the solver has found a feasible integer solution proved to be, for example, within 1% of optimal.</p>
<p>Overall, this work provides different methods to explore the space of alternative context-specific metabolic network reconstructions, and an extensive comparison under different settings. We generated in total around 191,000 network reconstructions with simulated data, around 329,000 reconstructions using microarray data from <italic>Saccharomyces cerevisiae</italic> and around 67,400 using RNA-seq data from different human cancer cell lines. Results of this evaluation show the importance of using an enumeration technique that finds a diverse set of solutions for different biological contexts. These results also provide important information for deciding which technique to use in each case. In general, <monospace>Diversity-enum</monospace> is the one that detects the most varied and relevant solutions for different biological contexts, followed by <monospace>Reaction-enum</monospace>, <monospace>Maxdist-enum</monospace> and <monospace>Icut-enum</monospace>. Our study also highlights that, given the variability of space of possible solutions that exists for a context-specific reconstruction problem, the analysis of a single solution, as is usually done, is not recommended, and downstream analysis made on a single metabolic network should be taken always with caution.</p>
</sec>
<sec id="sec019" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pcbi.1008730.s001" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s001" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Enumeration in Yeast 6 with 100 random <italic>R</italic><sub><italic>H</italic></sub> and 100 random <italic>R</italic><sub><italic>L</italic></sub> reactions.</title>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s002" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s002" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>Enumeration in Yeast 6 with 80 random <italic>R</italic><sub><italic>H</italic></sub> and 80 random <italic>R</italic><sub><italic>L</italic></sub> reactions.</title>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s003" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s003" xlink:type="simple">
<label>S3 Fig</label>
<caption>
<title>Enumeration in Yeast 6 with 60 random <italic>R</italic><sub><italic>H</italic></sub> and 60 random <italic>R</italic><sub><italic>L</italic></sub> reactions.</title>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s004" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s004" xlink:type="simple">
<label>S4 Fig</label>
<caption>
<title>Example of the effect of the parameter <italic>d</italic><sub><italic>s</italic></sub> used in the Diversity-enum method.</title>
<p>Values closer to 1 make the enumeration progress more slowly from closer to distant optimal solutions, discovering more proximate and intermediate solutions. When the value is lower (e.g. 0.990), the enumeration reaches the distant solutions more quickly, enumerating more solutions at the extremes.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s005" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s005" xlink:type="simple">
<label>S5 Fig</label>
<caption>
<title>Analysis of the effect of the parameter <italic>d</italic><sub><italic>s</italic></sub> used in the Diversity-enum method for the prediction of essential genes.</title>
<p>Analysis was repeated with parameter values <italic>d</italic><sub><italic>s</italic></sub> = 0.990 and <italic>d</italic><sub><italic>s</italic></sub> = 0.999 instead of the default value (<italic>d</italic><sub><italic>s</italic></sub> = 0.995). The results show almost no variation in terms of the TPR and FPR of the ensembles.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s006" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s006" xlink:type="simple">
<label>S6 Fig</label>
<caption>
<title>Distribution of the True Positive Rates (TPR).</title>
<p>Results show 1) the variability in the predictions of true essential genes by individual networks, and 2) the result of the ensemble for each method. Although variation of results of the individual solutions enumerated with each method are similar, results of the ensemble greatly differ between methods. This indicates that although the individual networks predict a similar number of true positive essential genes, these sets present less overlapping in networks enumerated with <monospace>Diversity-enum</monospace> and <monospace>Reaction-enum</monospace>, and therefore the overall TPR of the ensembles generated by these methods is better.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s007" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s007" xlink:type="simple">
<label>S7 Fig</label>
<caption>
<title>Distribution of the False Positive Rates (FPR).</title>
<p>Results show, as for the TPR, an increase of the FPR of the ensembles, more pronounced for <monospace>Diversity-enum</monospace> and <monospace>Reaction-enum</monospace>, since there is always a trade-off between both measurements: increasing the predictions of true positives comes with a higher rate of false positives.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s008" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s008" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Essential genes in Yeast.</title>
<p>Dataset used for the evaluation of in silico prediction of essential genes in Yeast. We used annotations from SGD [<xref ref-type="bibr" rid="pcbi.1008730.ref047">47</xref>], YDP (<ext-link ext-link-type="uri" xlink:href="https://www.yeastgenome.org/" xlink:type="simple">https://www.yeastgenome.org/</ext-link>) and Yeast 6 [<xref ref-type="bibr" rid="pcbi.1008730.ref046">46</xref>] to classify each gene as essential or not under aerobic conditions.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1008730.s009" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s009" xlink:type="simple">
<label>S1 Appendix</label>
<caption>
<title>Supplementary information.</title>
<p>PDF including: 1) computational time of the different enumeration techniques; 2) distribution of predicted essential genes among methods; and 3) variability in the detection of enriched pathways between the different methods.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pcbi.1008730.ref001">
<label>1</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Ward</surname> <given-names>PS</given-names></name>, <name name-style="western"><surname>Thompson</surname> <given-names>CB</given-names></name>. <article-title>Metabolic reprogramming: a cancer hallmark even warburg did not anticipate</article-title>. <source>Cancer cell</source>. <year>2012</year>;<volume>21</volume>(<issue>3</issue>):<fpage>297</fpage>–<lpage>308</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ccr.2012.02.014" xlink:type="simple">10.1016/j.ccr.2012.02.014</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref002">
<label>2</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>DeBerardinis</surname> <given-names>RJ</given-names></name>, <name name-style="western"><surname>Chandel</surname> <given-names>NS</given-names></name>. <article-title>Fundamentals of cancer metabolism</article-title>. <source>Science advances</source>. <year>2016</year>;<volume>2</volume>(<issue>5</issue>):<fpage>e1600200</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/sciadv.1600200" xlink:type="simple">10.1126/sciadv.1600200</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref003">
<label>3</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zhao</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Butler</surname> <given-names>EB</given-names></name>, <name name-style="western"><surname>Tan</surname> <given-names>M</given-names></name>. <article-title>Targeting cellular metabolism to improve cancer therapeutics</article-title>. <source>Cell death &amp; disease</source>. <year>2013</year>;<volume>4</volume>(<issue>3</issue>):<fpage>e532</fpage>–<lpage>e532</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/cddis.2013.60" xlink:type="simple">10.1038/cddis.2013.60</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref004">
<label>4</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>De Mas</surname> <given-names>IM</given-names></name>, <name name-style="western"><surname>Aguilar</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Jayaraman</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Polat</surname> <given-names>IH</given-names></name>, <name name-style="western"><surname>Martín-Bernabé</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Bharat</surname> <given-names>R</given-names></name>, <etal>et al</etal>. <article-title>Cancer cell metabolism as new targets for novel designed therapies</article-title>. <source>Future medicinal chemistry</source>. <year>2014</year>;<volume>6</volume>(<issue>16</issue>):<fpage>1791</fpage>–<lpage>1810</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.4155/fmc.14.119" xlink:type="simple">10.4155/fmc.14.119</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref005">
<label>5</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Cascante</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Boros</surname> <given-names>LG</given-names></name>, <name name-style="western"><surname>Comin-Anduix</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>de Atauri</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Centelles</surname> <given-names>JJ</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>PWN</given-names></name>. <article-title>Metabolic control analysis in drug discovery and disease</article-title>. <source>Nature biotechnology</source>. <year>2002</year>;<volume>20</volume>(<issue>3</issue>):<fpage>243</fpage>–<lpage>249</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nbt0302-243" xlink:type="simple">10.1038/nbt0302-243</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref006">
<label>6</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Liu</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Beyer</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Aebersold</surname> <given-names>R</given-names></name>. <article-title>On the dependency of cellular protein levels on mRNA abundance</article-title>. <source>Cell</source>. <year>2016</year>;<volume>165</volume>(<issue>3</issue>):<fpage>535</fpage>–<lpage>550</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cell.2016.03.014" xlink:type="simple">10.1016/j.cell.2016.03.014</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref007">
<label>7</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Shlomi</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Cabili</surname> <given-names>MN</given-names></name>, <name name-style="western"><surname>Herrgård</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Palsson</surname> <given-names>BØ</given-names></name>, <name name-style="western"><surname>Ruppin</surname> <given-names>E</given-names></name>. <article-title>Network-based prediction of human tissue-specific metabolism</article-title>. <source>Nature biotechnology</source>. <year>2008</year>;<volume>26</volume>(<issue>9</issue>):<fpage>1003</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nbt.1487" xlink:type="simple">10.1038/nbt.1487</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref008">
<label>8</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Eddy</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Price</surname> <given-names>ND</given-names></name>. <article-title>Reconstruction of genome-scale metabolic models for 126 human tissues using mCADRE</article-title>. <source>BMC systems biology</source>. <year>2012</year>;<volume>6</volume>(<issue>1</issue>):<fpage>153</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1752-0509-6-153" xlink:type="simple">10.1186/1752-0509-6-153</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref009">
<label>9</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Schultz</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Qutub</surname> <given-names>AA</given-names></name>. <article-title>Reconstruction of tissue-specific metabolic networks using CORDA</article-title>. <source>PLoS computational biology</source>. <year>2016</year>;<volume>12</volume>(<issue>3</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1004808" xlink:type="simple">10.1371/journal.pcbi.1004808</ext-link></comment> <object-id pub-id-type="pmid">26942765</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref010">
<label>10</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Folger</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Jerby</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Frezza</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Gottlieb</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Ruppin</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Shlomi</surname> <given-names>T</given-names></name>. <article-title>Predicting selective drug targets in cancer through metabolic networks</article-title>. <source>Molecular systems biology</source>. <year>2011</year>;<volume>7</volume>(<issue>1</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/msb.2011.35" xlink:type="simple">10.1038/msb.2011.35</ext-link></comment> <object-id pub-id-type="pmid">21694718</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref011">
<label>11</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Nam</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Campodonico</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bordbar</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Hyduke</surname> <given-names>DR</given-names></name>, <name name-style="western"><surname>Kim</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Zielinski</surname> <given-names>DC</given-names></name>, <etal>et al</etal>. <article-title>A systems approach to predict oncometabolites via context-specific genome-scale metabolic networks</article-title>. <source>PLoS computational biology</source>. <year>2014</year>;<volume>10</volume>(<issue>9</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1003837" xlink:type="simple">10.1371/journal.pcbi.1003837</ext-link></comment> <object-id pub-id-type="pmid">25232952</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref012">
<label>12</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Cascante</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Benito</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Zanuy</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Vizán</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Marín</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>de Atauri</surname> <given-names>P</given-names></name>. <article-title>Metabolic network adaptations in cancer as targets for novel therapies</article-title>. <source>Biochemical Society Transactions</source>. <year>2010</year>;<volume>38</volume>(<issue>5</issue>):<fpage>1302</fpage>–<lpage>1306</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1042/BST0381302" xlink:type="simple">10.1042/BST0381302</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref013">
<label>13</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Jerby</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Shlomi</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Ruppin</surname> <given-names>E</given-names></name>. <article-title>Computational reconstruction of tissue-specific metabolic models: application to human liver metabolism</article-title>. <source>Molecular systems biology</source>. <year>2010</year>;<volume>6</volume>(<issue>1</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/msb.2010.56" xlink:type="simple">10.1038/msb.2010.56</ext-link></comment> <object-id pub-id-type="pmid">20823844</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref014">
<label>14</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Agren</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Bordel</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Mardinoglu</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Pornputtapong</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Nookaew</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Nielsen</surname> <given-names>J</given-names></name>. <article-title>Reconstruction of genome-scale active metabolic networks for 69 human cell types and 16 cancer types using INIT</article-title>. <source>PLoS computational biology</source>. <year>2012</year>;<volume>8</volume>(<issue>5</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1002518" xlink:type="simple">10.1371/journal.pcbi.1002518</ext-link></comment> <object-id pub-id-type="pmid">22615553</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref015">
<label>15</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Vlassis</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Pacheco</surname> <given-names>MP</given-names></name>, <name name-style="western"><surname>Sauter</surname> <given-names>T</given-names></name>. <article-title>Fast reconstruction of compact context-specific metabolic network models</article-title>. <source>PLoS computational biology</source>. <year>2014</year>;<volume>10</volume>(<issue>1</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1003424" xlink:type="simple">10.1371/journal.pcbi.1003424</ext-link></comment> <object-id pub-id-type="pmid">24453953</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref016">
<label>16</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Pacheco</surname> <given-names>MP</given-names></name>, <name name-style="western"><surname>John</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Kaoma</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Heinäniemi</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Nicot</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Vallar</surname> <given-names>L</given-names></name>, <etal>et al</etal>. <article-title>Integrated metabolic modelling reveals cell-type specific epigenetic control points of the macrophage metabolic network</article-title>. <source>BMC genomics</source>. <year>2015</year>;<volume>16</volume>(<issue>1</issue>):<fpage>809</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12864-015-1984-4" xlink:type="simple">10.1186/s12864-015-1984-4</ext-link></comment> <object-id pub-id-type="pmid">26480823</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref017">
<label>17</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Agren</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Mardinoglu</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Asplund</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Kampf</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Uhlen</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Nielsen</surname> <given-names>J</given-names></name>. <article-title>Identification of anticancer drugs for hepatocellular carcinoma through personalized genome-scale metabolic modeling</article-title>. <source>Molecular systems biology</source>. <year>2014</year>;<volume>10</volume>(<issue>3</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/msb.145122" xlink:type="simple">10.1002/msb.145122</ext-link></comment> <object-id pub-id-type="pmid">24646661</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref018">
<label>18</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Machado</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Herrgård</surname> <given-names>M</given-names></name>. <article-title>Systematic evaluation of methods for integration of transcriptomic data into constraint-based models of metabolism</article-title>. <source>PLoS Comput Biol</source>. <year>2014</year>;<volume>10</volume>(<issue>4</issue>):<fpage>e1003580</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1003580" xlink:type="simple">10.1371/journal.pcbi.1003580</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref019">
<label>19</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Opdam</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Richelle</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Kellman</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Zielinski</surname> <given-names>DC</given-names></name>, <name name-style="western"><surname>Lewis</surname> <given-names>NE</given-names></name>. <article-title>A systematic evaluation of methods for tailoring genome-scale metabolic models</article-title>. <source>Cell systems</source>. <year>2017</year>;<volume>4</volume>(<issue>3</issue>):<fpage>318</fpage>–<lpage>329</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cels.2017.01.010" xlink:type="simple">10.1016/j.cels.2017.01.010</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref020">
<label>20</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Poupin</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Corlu</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Cabaton</surname> <given-names>NJ</given-names></name>, <name name-style="western"><surname>Dubois-Pot-Schneider</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Canlet</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Person</surname> <given-names>E</given-names></name>, <etal>et al</etal>. <article-title>Large-Scale Modeling Approach Reveals Functional Metabolic Shifts during Hepatic Differentiation</article-title>. <source>Journal of proteome research</source>. <year>2018</year>;<volume>18</volume>(<issue>1</issue>):<fpage>204</fpage>–<lpage>216</lpage>. <object-id pub-id-type="pmid">30394098</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref021">
<label>21</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Rossell</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Huynen</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Notebaart</surname> <given-names>RA</given-names></name>. <article-title>Inferring metabolic states in uncharacterized environments using gene-expression measurements</article-title>. <source>PLoS computational biology</source>. <year>2013</year>;<volume>9</volume>(<issue>3</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1002988" xlink:type="simple">10.1371/journal.pcbi.1002988</ext-link></comment> <object-id pub-id-type="pmid">23555222</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref022">
<label>22</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Robaina-Estévez</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Nikoloski</surname> <given-names>Z</given-names></name>. <article-title>On the effects of alternative optima in context-specific metabolic model predictions</article-title>. <source>PLoS computational biology</source>. <year>2017</year>;<volume>13</volume>(<issue>5</issue>):<fpage>e1005568</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1005568" xlink:type="simple">10.1371/journal.pcbi.1005568</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref023">
<label>23</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Schellenberger</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Palsson</surname> <given-names>BØ</given-names></name>. <article-title>Use of randomized sampling for analysis of metabolic networks</article-title>. <source>Journal of biological chemistry</source>. <year>2009</year>;<volume>284</volume>(<issue>9</issue>):<fpage>5457</fpage>–<lpage>5461</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1074/jbc.R800048200" xlink:type="simple">10.1074/jbc.R800048200</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref024">
<label>24</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Biggs</surname> <given-names>MB</given-names></name>, <name name-style="western"><surname>Papin</surname> <given-names>JA</given-names></name>. <article-title>Managing uncertainty in metabolic network structure and improving predictions using EnsembleFBA</article-title>. <source>PLoS computational biology</source>. <year>2017</year>;<volume>13</volume>(<issue>3</issue>):<fpage>e1005413</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1005413" xlink:type="simple">10.1371/journal.pcbi.1005413</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref025">
<label>25</label>
<mixed-citation publication-type="other" xlink:type="simple">Danna E, Fenelon M, Gu Z, Wunderling R. Generating multiple solutions for mixed integer programming problems. In: International Conference on Integer Programming and Combinatorial Optimization. Springer; 2007. p. 280–294.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref026">
<label>26</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Rintala</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Toivari</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Pitkänen</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Wiebe</surname> <given-names>MG</given-names></name>, <name name-style="western"><surname>Ruohonen</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Penttilä</surname> <given-names>M</given-names></name>. <article-title>Low oxygen levels as a trigger for enhancement of respiratory metabolism in Saccharomyces cerevisiae</article-title>. <source>BMC genomics</source>. <year>2009</year>;<volume>10</volume>(<issue>1</issue>):<fpage>461</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2164-10-461" xlink:type="simple">10.1186/1471-2164-10-461</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref027">
<label>27</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Heirendt</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Arreckx</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Pfau</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Mendoza</surname> <given-names>SN</given-names></name>, <name name-style="western"><surname>Richelle</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Heinken</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>Creation and analysis of biochemical constraint-based models using the COBRA Toolbox v. 3.0</article-title>. <source>Nature protocols</source>. <year>2019</year>;<volume>14</volume>(<issue>3</issue>):<fpage>639</fpage>–<lpage>702</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41596-018-0098-2" xlink:type="simple">10.1038/s41596-018-0098-2</ext-link></comment> <object-id pub-id-type="pmid">30787451</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref028">
<label>28</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Junker</surname> <given-names>BH</given-names></name>, <name name-style="western"><surname>Schreiber</surname> <given-names>F</given-names></name>. <source>Analysis of biological networks</source>. <volume>vol. 2</volume>. <publisher-name>John Wiley &amp; Sons</publisher-name>; <year>2011</year>.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref029">
<label>29</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Brunk</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Sahoo</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Zielinski</surname> <given-names>DC</given-names></name>, <name name-style="western"><surname>Altunkaya</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Dräger</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Mih</surname> <given-names>N</given-names></name>, <etal>et al</etal>. <article-title>Recon3D enables a three-dimensional view of gene variation in human metabolism</article-title>. <source>Nature biotechnology</source>. <year>2018</year>;<volume>36</volume>(<issue>3</issue>):<fpage>272</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nbt.4072" xlink:type="simple">10.1038/nbt.4072</ext-link></comment> <object-id pub-id-type="pmid">29457794</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref030">
<label>30</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Robinson</surname> <given-names>JL</given-names></name>, <name name-style="western"><surname>Kocabaş</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Cholley</surname> <given-names>PE</given-names></name>, <name name-style="western"><surname>Cook</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Nilsson</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>An atlas of human metabolism</article-title>. <source>Science Signaling</source>. <year>2020</year>;<volume>13</volume>(<issue>624</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/scisignal.aaz1482" xlink:type="simple">10.1126/scisignal.aaz1482</ext-link></comment> <object-id pub-id-type="pmid">32209698</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref031">
<label>31</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Conforti</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Cornuéjols</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Zambelli</surname> <given-names>G</given-names></name>, <etal>et al</etal>. <source>Integer programming</source>. <volume>vol. 271</volume>. <publisher-name>Springer</publisher-name>; <year>2014</year>.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref032">
<label>32</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Bremner</surname> <given-names>DD</given-names></name>. <source>On the complexity of vertex and facet enumeration for convex polytopes</source>. <publisher-name>McGill University</publisher-name>. <year>1997</year>;.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref033">
<label>33</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Ziegler</surname> <given-names>GM</given-names></name>. L<chapter-title>ectures on 0/1-polytopes</chapter-title>. In: <source>Polytopes—combinatorics and computation</source>. <publisher-name>Springer</publisher-name>; <year>2000</year>. p. <fpage>1</fpage>–<lpage>41</lpage>.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref034">
<label>34</label>
<mixed-citation publication-type="other" xlink:type="simple">Becker B, Behle M, Eisenbrand F, Wimmer R. BDDs in a branch and cut framework. In: International Workshop on Experimental and Efficient Algorithms. Springer; 2005. p. 452–463.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref035">
<label>35</label>
<mixed-citation publication-type="other" xlink:type="simple">Behle M, Eisenbrand F. 0/1 vertex and facet enumeration with BDDs. In: 2007 Proceedings of the Ninth Workshop on Algorithm Engineering and Experiments (ALENEX). SIAM; 2007. p. 158–165.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref036">
<label>36</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Behle</surname> <given-names>M</given-names></name>. <article-title>On threshold BDDs and the optimal variable ordering problem</article-title>. <source>Journal of Combinatorial Optimization</source>. <year>2008</year>;<volume>16</volume>(<issue>2</issue>):<fpage>107</fpage>–<lpage>118</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10878-007-9123-z" xlink:type="simple">10.1007/s10878-007-9123-z</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref037">
<label>37</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Serra</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Hooker</surname> <given-names>J</given-names></name>. <article-title>Compact representation of near-optimal integer programming solutions</article-title>. <source>Mathematical Programming</source>. <year>2019</year>; p. <fpage>1</fpage>–<lpage>34</lpage>.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref038">
<label>38</label>
<mixed-citation publication-type="other" xlink:type="simple">Serra T. Enumerative Branching with Less Repetition. In: International Conference on Integration of Constraint Programming, Artificial Intelligence, and Operations Research. Springer; 2020. p. 399–416.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref039">
<label>39</label>
<mixed-citation publication-type="other" xlink:type="simple">Achterberg T, Heinz S, Koch T. Counting solutions of integer programs using unrestricted subtree detection. In: International Conference on Integration of Artificial Intelligence (AI) and Operations Research (OR) Techniques in Constraint Programming. Springer; 2008. p. 278–282.</mixed-citation>
</ref>
<ref id="pcbi.1008730.ref040">
<label>40</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Bussieck</surname> <given-names>MR</given-names></name>, <name name-style="western"><surname>Lübbecke</surname> <given-names>ME</given-names></name>. <article-title>The vertex set of a 01-polytope is strongly P-enumerable</article-title>. <source>Computational Geometry</source>. <year>1998</year>;<volume>11</volume>(<issue>2</issue>):<fpage>103</fpage>–<lpage>109</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/S0925-7721(98)00021-2" xlink:type="simple">10.1016/S0925-7721(98)00021-2</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref041">
<label>41</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Tsai</surname> <given-names>JF</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>MH</given-names></name>, <name name-style="western"><surname>Hu</surname> <given-names>YC</given-names></name>. <article-title>Finding multiple solutions to general integer linear programs</article-title>. <source>European Journal of Operational Research</source>. <year>2008</year>;<volume>184</volume>(<issue>2</issue>):<fpage>802</fpage>–<lpage>809</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ejor.2006.11.024" xlink:type="simple">10.1016/j.ejor.2006.11.024</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref042">
<label>42</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Greistorfer</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Løkketangen</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Voß</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Woodruff</surname> <given-names>DL</given-names></name>. <article-title>Experiments concerning sequential versus simultaneous maximization of objective function and distance</article-title>. <source>Journal of Heuristics</source>. <year>2008</year>;<volume>14</volume>(<issue>6</issue>):<fpage>613</fpage>–<lpage>625</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10732-007-9053-z" xlink:type="simple">10.1007/s10732-007-9053-z</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref043">
<label>43</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Orth</surname> <given-names>JD</given-names></name>, <name name-style="western"><surname>Thiele</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Palsson</surname> <given-names>BØ</given-names></name>. <article-title>What is flux balance analysis?</article-title> <source>Nature biotechnology</source>. <year>2010</year>;<volume>28</volume>(<issue>3</issue>):<fpage>245</fpage>–<lpage>248</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nbt.1614" xlink:type="simple">10.1038/nbt.1614</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref044">
<label>44</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Joshi</surname> <given-names>CJ</given-names></name>, <name name-style="western"><surname>Schinn</surname> <given-names>SM</given-names></name>, <name name-style="western"><surname>Richelle</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Shamie</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>O’Rourke</surname> <given-names>EJ</given-names></name>, <name name-style="western"><surname>Lewis</surname> <given-names>NE</given-names></name>. <article-title>StanDep: capturing transcriptomic variability improves context-specific metabolic models</article-title>. <source>PLoS computational biology</source>. <year>2020</year>;<volume>16</volume>(<issue>5</issue>):<fpage>e1007764</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1007764" xlink:type="simple">10.1371/journal.pcbi.1007764</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref045">
<label>45</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>McCall</surname> <given-names>MN</given-names></name>, <name name-style="western"><surname>Uppal</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Jaffee</surname> <given-names>HA</given-names></name>, <name name-style="western"><surname>Zilliox</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Irizarry</surname> <given-names>RA</given-names></name>. <article-title>The Gene Expression Barcode: leveraging public data repositories to begin cataloging the human and murine transcriptomes</article-title>. <source>Nucleic acids research</source>. <year>2011</year>;<volume>39</volume>(<issue>suppl_1</issue>):<fpage>D1011</fpage>–<lpage>D1015</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkq1259" xlink:type="simple">10.1093/nar/gkq1259</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref046">
<label>46</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Heavner</surname> <given-names>BD</given-names></name>, <name name-style="western"><surname>Smallbone</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Price</surname> <given-names>ND</given-names></name>, <name name-style="western"><surname>Walker</surname> <given-names>LP</given-names></name>. <article-title>Version 6 of the consensus yeast metabolic network refines biochemical coverage and improves model performance</article-title>. <source>Database</source>. <year>2013</year>;<volume>2013</volume>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/database/bat059" xlink:type="simple">10.1093/database/bat059</ext-link></comment></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref047">
<label>47</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Cherry</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Hong</surname> <given-names>EL</given-names></name>, <name name-style="western"><surname>Amundsen</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Balakrishnan</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Binkley</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Chan</surname> <given-names>ET</given-names></name>, <etal>et al</etal>. <article-title>Saccharomyces Genome Database: the genomics resource of budding yeast</article-title>. <source>Nucleic acids research</source>. <year>2012</year>;<volume>40</volume>(<issue>D1</issue>):<fpage>D700</fpage>–<lpage>D705</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkr1029" xlink:type="simple">10.1093/nar/gkr1029</ext-link></comment> <object-id pub-id-type="pmid">22110037</object-id></mixed-citation>
</ref>
<ref id="pcbi.1008730.ref048">
<label>48</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Duarte</surname> <given-names>NC</given-names></name>, <name name-style="western"><surname>Becker</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Jamshidi</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Thiele</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Mo</surname> <given-names>ML</given-names></name>, <name name-style="western"><surname>Vo</surname> <given-names>TD</given-names></name>, <etal>et al</etal>. <article-title>Global reconstruction of the human metabolic network based on genomic and bibliomic data</article-title>. <source>Proceedings of the National Academy of Sciences</source>. <year>2007</year>;<volume>104</volume>(<issue>6</issue>):<fpage>1777</fpage>–<lpage>1782</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.0610772104" xlink:type="simple">10.1073/pnas.0610772104</ext-link></comment> <object-id pub-id-type="pmid">17267599</object-id></mixed-citation>
</ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pcbi.1008730.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Kaleta</surname>
<given-names>Christoph</given-names>
</name>
<role>Associate Editor</role>
</contrib>
<contrib contrib-type="author">
<name name-style="western">
<surname>Beard</surname>
<given-names>Daniel A</given-names>
</name>
<role>Deputy Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Kaleta, Beard</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1008730" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">10 Sep 2020</named-content>
</p>
<p>Dear Dr. Rodríguez Mier,</p>
<p>Thank you very much for submitting your manuscript "DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks" for consideration at PLOS Computational Biology.</p>
<p>As with all papers reviewed by the journal, your manuscript was reviewed by members of the editorial board and by several independent reviewers. In light of the reviews (below this email), we would like to invite the resubmission of a significantly-revised version that takes into account the reviewers' comments. In the revisions, as pointed out by the reviewers, please make particularly sure to provide more details on the insights which can be achieved through using an ensemble of metabolic models and expand the applications to another organism with a larger metabolic network (e.g. human) to demonstrate the broad applicability of your method.</p>
<p>We cannot make any decision about publication until we have seen the revised manuscript and your response to the reviewers' comments. Your revised manuscript is also likely to be sent to reviewers for further evaluation.</p>
<p>When you are ready to resubmit, please upload the following:</p>
<p>[1] A letter containing a detailed list of your responses to the review comments and a description of the changes you have made in the manuscript. Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out.</p>
<p>[2] Two versions of the revised manuscript: one with either highlights or tracked changes denoting where the text has been changed; the other a clean version (uploaded as the manuscript file).</p>
<p>Important additional instructions are given below your reviewer comments.</p>
<p>Please prepare and submit your revised manuscript within 60 days. If you anticipate any delay, please let us know the expected resubmission date by replying to this email. Please note that revised manuscripts received after the 60-day due date may require evaluation and peer review similar to newly submitted manuscripts.</p>
<p>Thank you again for your submission. We hope that our editorial process has been constructive so far, and we welcome your feedback at any time. Please don't hesitate to contact us if you have any questions or comments.</p>
<p>Sincerely,</p>
<p>Christoph Kaleta</p>
<p>Associate Editor</p>
<p>PLOS Computational Biology</p>
<p>Daniel Beard</p>
<p>Deputy Editor</p>
<p>PLOS Computational Biology</p>
<p>***********************</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #1: Reproducibility report has been uploaded as an attachment.</p>
<p>Reviewer #2: Overview:</p>
<p>In the manuscript, "DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks", the authors describe a novel method that create a diversity of context-specific metabolic networks. The issue of alternative metabolic networks not being captured by various different context-specific extraction methods has been noted in the past. Previous papers, for e.g. Robaina-Estevez S et al., 2017, pointed out the effects of alternate optimal context-specific metabolic networks. However, like that paper, the author's results also appear to target iMAT and fastCORE. While I think alternate optimal context-specific metabolic network do contain important results, I haven't seen anyone explicitly show that improvements in context-specific models can be achieved by enumerating alternate optimal states. To this end, the authors have tested alternate optimal context-specific metabolic networks to show that - (a) only small improvements could be achieved over Rxn-enum, and (b) large improvements could be achieved over Maxdist and Integer-cut. Different algorithms are, in a way, different interpretations of the organization of context-specific network; thus, it should depend on the enumeration algorithms. Further, the premise misses the importance of thresholding as an input to the algorithm. Recently, many different thresholding methods have been proposed and reviewed which maybe able to perform better and may highlight power of DEXOM. DEXOM can be a promising method since it did The authors have tried to do some benchmarking but it also requires more testing to fully evaluate the power of DEXOM. How much can I learn more about my data/organism/biology by using DEXOM rather than other methods? Based on this, I recommend major revisions.</p>
<p>MINOR CONCERNS:</p>
<p>1. Please provide a line numbered version, so reviewers can point out the location of reference easily.</p>
<p>2. How prevalent is the usage of DAG in understanding metabolic networks. Perhaps authors could better highlight and justify their use of DAGs in Introduction.</p>
<p>3. In regards to TPR and FPR, the authors chose to report the ratios. Could they, please, also show the actual numbers? I am curious how they get influenced from difference between low to high thresholds.</p>
<p>4. Are Table 1, Figure 7, and Figure 8 coming from the same set of simulations? If yes, could authors please either concise some of this information into a single figure or put some of it in supplementary information?</p>
<p>5. Could the authors, please, provide the link to the method within the paper somewhere as well.</p>
<p>MAJOR CONCERNS:</p>
<p>1. I want to make sure that by optimal conext-specific model they mean, models which maximizes the addition of active and removal of inactive reactions with experimental evidence. Doesn't this definition assume that the output of the thresholding is accurate? Besides global thresholding used in this paper, recent studies have identified better thresholding approaches that generate more accurate models, also some thresholding approaches have enforced biological significance. Have the authors looked at some of these thresholding approaches to see if DEXOM can do better with better thresholds?</p>
<p>2. A lot of the work on how to measure diversity has been done, i.e. the diversity between models has often been reported using Jaccard similarity (see Fastcore line of papers and papers evaluating context-pecific extraction method). The behavior of this distance metric with reaction active with experimental evidence and those of the models has been shown in these papers. The authors can choose to use Hamming distance, but it becomes difficult for the reader to compare results in those papers with those reported by the authors. Could the authors please provide some justification for their choice to use Hamming distance? Alternative authors could also show that these results could be reproduced using Jaccard similarity.</p>
<p>3. Robaina-Estevez et al. 2017 found that CORDA, a methods that enforced metabolic functionality rather than the reactions performed better. Does this statement still hold true with DEXOM using gene essentiality as metric?</p>
<p>4. This is tied to point 4, how does DEXOM do if it is coupled with metabolic functionality.</p>
<p>5. For improving these types of context-specific models, two major ways have been targeted: (1) extraction method, and (2) thresholding. The authors are dealing with a novel extraction method. As a way to point out if this is the way forward, can the authors compare their TPR/FPR with those achieved from changing the thresholding method.</p>
<p>6. Different algorithms are enforcing different interepretations of how context-specific metabolic networks are organized. Whether the interpretation is true for only yeast, single-celled organisms, eukaryotes, or fungi, or other organisms too. Yeast is a single cell organism which can be grown in the lab relatively easily compared to mammalian cells. Extensive comparison data and models are available if DEXOM was applied to human tissues, mouse, and/or cancer cell lines. Could the authors show that DEXOM can be extrapolated to other organisms and/or datasets?</p>
<p>Reviewer #3: Rodrı́guez-Mier et al. propose DEXOM, a diversity based enumeration method of optimal context-specific metabolic models. In essence the authors describe advantages and disadvantages of three alternatives for enumerating alternative solutions of optimal context-specific metabolic models.</p>
<p>The authors show and describe a thorough understanding of the theoretical aspects of their work.</p>
<p>DEXOM takes into consideration the advantages of three alternative MILP methods (Rxn-enum, integer-cut, maxdist) and computes increasingly distant (diverse) optimal alternative solutions based on an initial sampling set (computed with Rxn-enum). Diversity and comparison to other methods is quantified by two metrics, hamming and nearest neighbor distance, which are used throughout the manuscript for evaluation across different methods. The capability of DEXOM to predict gene essentiality based on published data and a model for S. cerevisae are demonstrated.</p>
<p>Though the authors identified and addressed a very important aspect of constraint based modeling, I see major drawbacks that prevent the laid out DEXOM algorithm to be published in PLOS COMP BIO.</p>
<p>Although the diversity seems to be sampled best by DEXOM as visually shown by UMAP based dimension reduction, the advantage of the improved metrics seems to be not always the case as shown in panels E and F of Fig. 5 and S1-S3 and depends to a large quantity on the sampled networks (cf. e.g. S3 against S2 and S1). Hence, it appears that it cannot be guaranteed that DEXOM outperforms Maxdist with respect to the given metrics. In addition, as shown in Table 1 and Figure 7 and 8, the advantage of using DEXOM over Rxn-enum for finding essential genes in a given network is not substantial. Specifically the increased TPR is often achieved at the price of an increased FPR, while the differences in TPR and FPR for Rxn-enum and DEXOM are generally very low. This seems to render Rxn-enum the better algorithm for identifying gene essentiality due to its low runtime (compared in Fig. 9), despite providing only a restricted and not diverse set of optimal networks.</p>
<p>In summary, the advantages of using and applying DEXOM are not obvious and the authors need to justify why DEXOM should be used instead of Rxn-enum (for finding essential genes) or Maxdist (for computing a diverse set alternative optimal networks for further downstream analysis like pathway enrichment).</p>
<p>With this several further major drawbacks are present:</p>
<p>- No usable DEXOM algorithm is provided. This work would greatly benefit from having a ready made software code available that based on any given SBML model allows to compute a diverse set of alternative optimal metabolic models.</p>
<p>- Only one model and one dataset on yeast are used for demonstrating and comparing the performance of the proposed algorithm and this only for gene essentiality analysis. Although mentioned in the introduction as potential use case the authors did not include any model analysis to investigate diseases in e.g. human, presumably because human reconstructions like Recon3D possess a large number of reactions, which make it computationally demanding to compute optimal solutions. Nevertheless, it remains unknown how DEXOM performs on other data. At least one further data set analysis, potentially with a different aim than identifying gene essentiality (e.g. enriched function) of another organism would greatly improve the trust in DEXOM, if it can be shown that it outperforms competing methods.</p>
<p>- The authors do not discuss the virtually equal quality of Rxn-enum vs DEXOM for gene essentiality analysis. Furthermore, TPR and FPR are not given for the alternative sets presented in S1-S3, which show much different hamming and nearest neighbor values, which presumably influence achievable TPR and FPR values. Code and solutions are not given to allow recapitulating the analysis.</p>
<p>- Throughout the manuscript preciseness is missing. Parameters and functions of the presented algorithm are not described (e.g. exp(a,b) = a^b is not explained). Figures are not described well (subpanel description are not given in captions and are most often not referenced in the main text). Another example is the definition and use of thresholds used in table 1, which need to be briefly introduced in the main text (e.g. at page 20).</p>
<p>- A major rewrite is recommended, as much of the methods section is recapitulating methods that are published elsewhere. Instead the focus should be on the DEXOM description, while the extensive text body on alternate methods (essentially page 4-13) leading towards DEXOM should be shortened dramatically in the main text. The detailed description would be suitable for a supplementary text that can be referenced in the methods.</p>
<p>Minor</p>
<p>==</p>
<p>- page 3: 10.1016/j.cels.2017.01.010 and <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pcbi.1003580" xlink:type="simple">https://doi.org/10.1371/journal.pcbi.1003580</ext-link> are two references that should be mentioned here as they show performances of methods tailored towards computing context specific metabolic models</p>
<p>- page 8: "... extensively exploited in commercial solvers such as IBM CPLEX and</p>
<p>Gurobi" - both provide academic free academic licenses, which would be great to mention</p>
<p>- page 9: Here, a short paragraph on model optimisation by gap filling, detection of orphan reactions or EGCs leading to the fact that this does solve only parts of the problem of network artefacts would be suitable.</p>
<p>- page 14: "It starts computing an initial set of solutions using the Rxn-enum method avoiding duplicated solutions. This guarantees that single variations of reactions across all pathways are explored." - This is confusing, as the initial set of solutions will not necessarily include variations across pathways, unless the initial set size is sufficiently high.</p>
<p>- page 14: "Using a ds value close to one (e.g. ds = 0.99), the search concentrates at the beginning with more probability in the close vicinity of the selected solution." - Why should it be desirable to start with similar solutions as most diverse solutions are the goal? Of course the formulation allows to start with more distant solutions. It should be made clear here, why similar solutions might be desirable to be computed first.</p>
<p>- page 15: "For example, in Saccharomyces Cerevisiae, gene ARG2, which encodes glutamate N-acetyltransferase —a mitochondrial enzyme that catalyzes the first step in the biosynthesis of the arginine— is essential only in the</p>
<p>absence of arginine in the medium." - It should be made clear that this is might be a general theme in arginine metabolism, not just in S cerevisae (lowercase for cerevisiae)</p>
<p>- page 15: COBRA toolbox reference should be cited here and elsewhere. Also Yeast 6 is not referenced everywhere (e.g. page 16)</p>
<p>- page 17: "The grey points correspond to the 1,024 optimal solutions of the ground truth." - This is very unclear at this point. Please improve clarity of description of the figures here and elsewhere.</p>
<p>- page 17: "The Maxdist method shows at the beginning of the search the largest distance, since the solutions are generated by finding extreme differences. After an initial set of 25 optimal solutions, the average distance stops increasing. This is something to expect since the most distant solutions are usually discovered at the beginning of the search." - The question arises, whether having the first most diverse networks with Maxdist is sufficient and fast enough as both hamming and nearest neighbor distance are high. This should be analysed, mentioned and discussed.</p>
<p>- page 19: "After DEXOM generates an initial set of around 600 solutions, both the average distance and the average nearest neighbor distance start to grow surpassing the other methods." - It is surprising that there is not a gradual, but sudden improvement by DEXOM (also in Fig. 4 for the DAG model). The authors should explain or at least hypothesize on why this is happening abruptly and not monotoically increasing for both hamming and neirest neighboor distance. Regarding particularly investigation in the supplement it appears that for low numbers of sample networks, Maxdist is the better method in terms metric performances. Again, the authors should discuss this point.</p>
<p>- page 20: Since [38] lists gene expression under low oxygen levels (at most 2.8%), the authors should discuss briefly, whether there is a difference to be expected for higher oxygen levels and how this might affect gene essentiality.</p>
<p>- page 20: "Thus, in total, we generated 16 ensembles per method, one for each threshold." - Thresholds are not defined.</p>
<p>- Table 1 would be much easier to investigate as bar plot with four bars (4 methods) for each threshold configuration</p>
<p>- methods and results/discussion should be clearly separated (e.g. machine configuration mentioned on page 24 is Methods content)</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: <bold>No: </bold>Computed models and essential genes are missing that are the basis for given manuscript and supplement figures and table 1.</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: No</p>
<p>Reviewer #3: <bold>Yes: </bold>Sascha Schäuble</p>
<p><underline>Figure Files:</underline></p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <underline><ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com</ext-link></underline>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email us at <underline><email xlink:type="simple">figures@plos.org</email></underline>.</p>
<p><underline>Data Requirements:</underline></p>
<p>Please note that, as a condition of publication, PLOS' data policy requires that you make available all data used to draw the conclusions outlined in your manuscript. Data must be deposited in an appropriate repository, included within the body of the manuscript, or uploaded as supporting information. This includes all numerical values that were used to generate graphs, histograms etc.. For an example in PLOS Biology see here: <ext-link ext-link-type="uri" xlink:href="http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1001908#s5" xlink:type="simple">http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1001908#s5</ext-link>.</p>
<p><underline>Reproducibility:</underline></p>
<p>To enhance the reproducibility of your results, PLOS recommends that you deposit laboratory protocols in protocols.io, where a protocol can be assigned its own identifier (DOI) such that it can be cited independently in the future. For instructions, please see <underline><ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plospathogens/s/submission-guidelines" xlink:type="simple">http://journals.plos.org/compbiol/s/submission-guidelines#loc-materials-and-methods</ext-link></underline></p>
<supplementary-material id="pcbi.1008730.s010" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s010" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Reproducible_report__PCOMPBIOL_D_20_01343 (1).pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pcbi.1008730.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pcbi.1008730" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pcbi.1008730.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">16 Nov 2020</named-content>
</p>
<supplementary-material id="pcbi.1008730.s011" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s011" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pcbi.1008730.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Kaleta</surname>
<given-names>Christoph</given-names>
</name>
<role>Associate Editor</role>
</contrib>
<contrib contrib-type="author">
<name name-style="western">
<surname>Beard</surname>
<given-names>Daniel A</given-names>
</name>
<role>Deputy Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Kaleta, Beard</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1008730" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">3 Dec 2020</named-content>
</p>
<p>Dear Dr. Rodríguez Mier,</p>
<p>Thank you very much for submitting your manuscript "DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks" for consideration at PLOS Computational Biology. As with all papers reviewed by the journal, your manuscript was reviewed by members of the editorial board and by several independent reviewers. The reviewers appreciated the attention to an important topic. Based on the reviews, we are likely to accept this manuscript for publication, providing that you modify the manuscript according to the review recommendations. Please pay particular attention to the comments of reviewer 2 concerning the motivation to include all four methods in the work and the reasoning to include similarly performing methods with large differences in required runtime.</p>
<p>Please prepare and submit your revised manuscript within 30 days. If you anticipate any delay, please let us know the expected resubmission date by replying to this email. </p>
<p>When you are ready to resubmit, please upload the following:</p>
<p>[1] A letter containing a detailed list of your responses to all review comments, and a description of the changes you have made in the manuscript. Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out</p>
<p>[2] Two versions of the revised manuscript: one with either highlights or tracked changes denoting where the text has been changed; the other a clean version (uploaded as the manuscript file).</p>
<p>Important additional instructions are given below your reviewer comments.</p>
<p>Thank you again for your submission to our journal. We hope that our editorial process has been constructive so far, and we welcome your feedback at any time. Please don't hesitate to contact us if you have any questions or comments.</p>
<p>Sincerely,</p>
<p>Christoph Kaleta</p>
<p>Associate Editor</p>
<p>PLOS Computational Biology</p>
<p>Daniel Beard</p>
<p>Deputy Editor</p>
<p>PLOS Computational Biology</p>
<p>***********************</p>
<p>A link appears below if there are any accompanying review attachments. If you believe any reviews to be missing, please contact <email xlink:type="simple">ploscompbiol@plos.org</email> immediately:</p>
<p>[LINK]</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #1: Reproducibility report has been uploaded as an attachment.</p>
<p>Reviewer #2: OVERVIEW:</p>
<p>In the manuscript, "DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks", the authors describe a novel method that create a diversity of context-specific metabolic networks. It seems that enumerating multiple context-specific networks is valuable and is known for sometime as authors have pointed out. It is more clear in this version of the manuscript that authors are trying to bring everything under one umbrella called DEXOM and subtly claiming that diversity-enum is more accurate. My first major concern is that Diversity-enum and Rxn-enum seem to have the same accuracy. My second major concern is that current analysis doesn't say anything about networks uniquely captured by Diversity-enum, Rxn-enum, Icut-enum, or Maxdist. What is the problem that authors are solving - time taken to generate the ensemble, diversity in the ensemble, accuracy of the ensemble, or simply a toolbox? Based on the current analysis shown, I am still not clear if additional diversity, i.e. Diversity-enum, was useful in either improving calculation time or accuracy. It is also not clear why a modeler should care for all the 4 approaches that authors have described here, when clearly only Rxn-enum and Diversity-enum are in anyway accurate. Given that analysis presented here needs more fleshing out, I would recommend a major revision. Please see details below.</p>
<p>MAJOR CONCERNS:</p>
<p>1. Why is there a need for 4 different approaches which need to be under one umbrella? Why can't just the best method prevail? Are there any advantages of Icut-enum and Maxdist-enum? These two have very poor accuracy and why do they need to be included in the paper or in the toolbox. Can the authors show, systemically, cases where enumeration using one method is better than other. The authors can do this by finding which essential genes are captured differentially among networks calculated using different methods. I think this is an important benchmarking step for the authors to justify the need of multiple enumeration methods within this toolbox for context-specific networks.</p>
<p>2. To me it appears Rxn-enum and Diversity-enum have similar accuracy. Diversity-enum and Maxdist give the most diversity within the models and they both take a long time to run. So authors generated a method which produces a diverse ensemble but it takes a long time. However, this long time is not producing better results than Rxn-enum, seems pretty similar to me. I would use Rxn-enum because it goes really fast and produces that same accuracy as Diversity-enum. I am not sure if Diversity-enum produces valuable diversity within models. Unless authors can systematically show that extra context-specific metabolic networks that are being differentially captured by Diversity-enum but not by Rxn-enum are valuable. Authors can simply see which of the networks captured uniquely by Diversity-enum captured gene essentiality not explained by Rxn-enum.</p>
<p>3. If Diversity-enum is better, could the authors just do a statistical test for difference in distributions of TPR from different methods? These are basically p-values from comparing TPR distributions between pairs of methods.</p>
<p>Reviewer #3: The authors underwent substantial labor to improve their manuscript.</p>
<p>The focus of the paper has been improved and shifted away from focusing on the new DEXOM formulation itself (now termed Diversity-enum) to presenting a suite of readily usable algorithms for enumerating alternative optimal metabolic network solutions. These include Diversity-enum as one of four methods for which no COBRA compatible toolbox extension was available so far. The precision in phrasing has been improved throughout the manuscript. More importantly the authors succeeded in adding another use-case, by adding an enrichment analysis of four human cancer cell lines. The authors showed that a set of differing context specific metabolic models with equal optimality can possess a diverse set of enriched metabolic pathways. This outcome is a valuable addition to the manuscript, but misses one aspect to be complete (see below).</p>
<p>The authors show that equally probable context specific networks can possess a diverse set of enriched functions. In consequence pathway enrichment analysis needs to be treated with care. The authors do not show, however, how frequent any of these pathways occur, e.g. is there a single pathway that is enriched in most, if not in all, sets of networks? This way enriched pathways occurring only a few times, might be considered artifacts of the respective networks, yet pathways that are enriched in most networks hint towards metabolic function that translated into the context specific metabolic networks and thus would be of value. Here, it would also be of interest how the frequency of enriched pathways differs after applying the four different methods. Such an analysis should be easy to add, would improve the provided insights into enriched pathway analysis and complete the revision.</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #1: None</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #1: <bold>Yes: </bold>Anand K. Rampadarath</p>
<p>Reviewer #2: No</p>
<p>Reviewer #3: <bold>Yes: </bold>Sascha Schäuble</p>
<p><underline>Figure Files:</underline></p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <underline><ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com</ext-link></underline>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email us at <underline><email xlink:type="simple">figures@plos.org</email></underline>.</p>
<p><underline>Data Requirements:</underline></p>
<p>Please note that, as a condition of publication, PLOS' data policy requires that you make available all data used to draw the conclusions outlined in your manuscript. Data must be deposited in an appropriate repository, included within the body of the manuscript, or uploaded as supporting information. This includes all numerical values that were used to generate graphs, histograms etc.. For an example in PLOS Biology see here: <ext-link ext-link-type="uri" xlink:href="http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1001908#s5" xlink:type="simple">http://www.plosbiology.org/article/info%3Adoi%2F10.1371%2Fjournal.pbio.1001908#s5</ext-link>.</p>
<p><underline>Reproducibility:</underline></p>
<p>To enhance the reproducibility of your results, PLOS recommends that you deposit laboratory protocols in protocols.io, where a protocol can be assigned its own identifier (DOI) such that it can be cited independently in the future. For instructions see <underline><ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plospathogens/s/submission-guidelines" xlink:type="simple">http://journals.plos.org/ploscompbiol/s/submission-guidelines#loc-materials-and-methods</ext-link></underline></p>
<supplementary-material id="pcbi.1008730.s012" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s012" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Reproducible_report_PCOMPBIOL_D_20_01343R1 (1).pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pcbi.1008730.r004">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730.r004</article-id>
<title-group>
<article-title>Author response to Decision Letter 1</article-title>
</title-group>
<related-object document-id="10.1371/journal.pcbi.1008730" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj004" link-type="rebutted-decision-letter" object-id="10.1371/journal.pcbi.1008730.r003" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">21 Dec 2020</named-content>
</p>
<supplementary-material id="pcbi.1008730.s013" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1008730.s013" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response_R2.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pcbi.1008730.r005" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730.r005</article-id>
<title-group>
<article-title>Decision Letter 2</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Kaleta</surname>
<given-names>Christoph</given-names>
</name>
<role>Associate Editor</role>
</contrib>
<contrib contrib-type="author">
<name name-style="western">
<surname>Beard</surname>
<given-names>Daniel A</given-names>
</name>
<role>Deputy Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Kaleta, Beard</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1008730" document-id-type="doi" document-type="article" id="rel-obj005" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">21 Jan 2021</named-content>
</p>
<p>Dear Dr. Rodríguez Mier,</p>
<p>We are pleased to inform you that your manuscript 'DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks' has been provisionally accepted for publication in PLOS Computational Biology. </p>
<p>Before your manuscript can be formally accepted you will need to complete some formatting changes, which you will receive in a follow up email as well as the remaining minor point raised by reviewer 3. A member of our team will be in touch with a set of requests.</p>
<p>Please note that your manuscript will not be scheduled for publication until you have made the required changes, so a swift response is appreciated.</p>
<p>IMPORTANT: The editorial review process is now complete. PLOS will only permit corrections to spelling, formatting or significant scientific errors from this point onwards. Requests for major changes, or any which affect the scientific understanding of your work, will cause delays to the publication date of your manuscript.</p>
<p>Should you, your institution's press office or the journal office choose to press release your paper, you will automatically be opted out of early publication. We ask that you notify us now if you or your institution is planning to press release the article. All press must be co-ordinated with PLOS.</p>
<p>Thank you again for supporting Open Access publishing; we are looking forward to publishing your work in PLOS Computational Biology. </p>
<p>Best regards,</p>
<p>Christoph Kaleta</p>
<p>Associate Editor</p>
<p>PLOS Computational Biology</p>
<p>Daniel Beard</p>
<p>Deputy Editor</p>
<p>PLOS Computational Biology</p>
<p>***********************************************************</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #2: The authors have satisfactorily addressed all of my concerns.</p>
<p>Reviewer #3: The authors have followed up on my point raised and added a comprehensible analysis of the frequency of enriched pathways in the S1 appendix file.</p>
<p>These results, however, were not mentioned or referenced in the main text body.</p>
<p>The supporting information description was also not updated.</p>
<p>I kindly ask the authors to connect their frequency analysis of enriched pathways to the main text (presumably section "Diversity-enum detects more alternative hypothesis of the possible metabolic state of different human cancer cells") by adding e.g. a brief summary of their observation and linking the appendix file accordingly.</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #2: No</p>
<p>Reviewer #3: <bold>Yes: </bold>Sascha Schäuble</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pcbi.1008730.r006" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1008730.r006</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Kaleta</surname>
<given-names>Christoph</given-names>
</name>
<role>Associate Editor</role>
</contrib>
<contrib contrib-type="author">
<name name-style="western">
<surname>Beard</surname>
<given-names>Daniel A</given-names>
</name>
<role>Deputy Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Kaleta, Beard</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1008730" document-id-type="doi" document-type="article" id="rel-obj006" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">5 Feb 2021</named-content>
</p>
<p>PCOMPBIOL-D-20-01343R2 </p>
<p>DEXOM: Diversity-based enumeration of optimal context-specific metabolic networks</p>
<p>Dear Dr Rodríguez Mier,</p>
<p>I am pleased to inform you that your manuscript has been formally accepted for publication in PLOS Computational Biology. Your manuscript is now with our production department and you will be notified of the publication date in due course.</p>
<p>The corresponding author will soon be receiving a typeset proof for review, to ensure errors have not been introduced during production. Please review the PDF proof of your manuscript carefully, as this is the last chance to correct any errors. Please note that major changes, or those which affect the scientific understanding of the work, will likely cause delays to the publication date of your manuscript. </p>
<p>Soon after your final files are uploaded, unless you have opted out, the early version of your manuscript will be published online. The date of the early version will be your article's publication date. The final article will be published to the same URL, and all versions of the paper will be accessible to readers.</p>
<p>Thank you again for supporting PLOS Computational Biology and open-access publishing. We are looking forward to publishing your work! </p>
<p>With kind regards,</p>
<p>Alice Ellingham</p>
<p>PLOS Computational Biology | Carlyle House, Carlyle Road, Cambridge CB4 3DN | United Kingdom <email xlink:type="simple">ploscompbiol@plos.org</email> | Phone +44 (0) 1223-442824 | <ext-link ext-link-type="uri" xlink:href="http://ploscompbiol.org" xlink:type="simple">ploscompbiol.org</ext-link> | @PLOSCompBiol</p>
</body>
</sub-article>
</article>