<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1004976</article-id>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-15-01957</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Sequence analysis</subject><subj-group><subject>Sequence alignment</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Sequence analysis</subject><subj-group><subject>Sequence alignment</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Computational techniques</subject><subj-group><subject>Split-decomposition method</subject><subj-group><subject>Multiple alignment calculation</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Evolutionary biology</subject><subj-group><subject>Evolutionary systematics</subject><subj-group><subject>Phylogenetics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Taxonomy</subject><subj-group><subject>Evolutionary systematics</subject><subj-group><subject>Phylogenetics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Data management</subject><subj-group><subject>Taxonomy</subject><subj-group><subject>Evolutionary systematics</subject><subj-group><subject>Phylogenetics</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Molecular biology assays and analysis techniques</subject><subj-group><subject>Phylogenetic analysis</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Molecular biology assays and analysis techniques</subject><subj-group><subject>Phylogenetic analysis</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Evolutionary biology</subject><subj-group><subject>Molecular evolution</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Software tools</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Engineering and technology</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Software tools</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Protein sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>Protein sequencing</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Evolutionary biology</subject><subj-group><subject>Evolutionary genetics</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>PhyloBot: A Web Portal for Automated Phylogenetics, Ancestral Sequence Reconstruction, and Exploration of Mutational Trajectories</article-title>
<alt-title alt-title-type="running-head">PhyloBot Web Portal</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<name name-style="western">
<surname>Hanson-Smith</surname>
<given-names>Victor</given-names>
</name>
<xref ref-type="corresp" rid="cor001">*</xref>
<xref ref-type="aff" rid="aff001"/>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Johnson</surname>
<given-names>Alexander</given-names>
</name>
<xref ref-type="aff" rid="aff001"/>
</contrib>
</contrib-group>
<aff id="aff001"><addr-line>Department of Microbiology and Immunology, University of California, San Francisco, San Francisco, California, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Gardner</surname>
<given-names>Paul P</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>University of Canterbury, NEW ZEALAND</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<fn fn-type="con" id="contrib001">
<p>Conceived and designed the experiments: VHS. Performed the experiments: VHS. Analyzed the data: VHS. Wrote the paper: VHS AJ.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">victor.hanson-smith@ucsf.edu</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>29</day>
<month>7</month>
<year>2016</year>
</pub-date>
<pub-date pub-type="collection">
<month>7</month>
<year>2016</year>
</pub-date>
<volume>12</volume>
<issue>7</issue>
<elocation-id>e1004976</elocation-id>
<history>
<date date-type="received">
<day>19</day>
<month>11</month>
<year>2015</year>
</date>
<date date-type="accepted">
<day>12</day>
<month>5</month>
<year>2016</year>
</date>
</history>
<permissions>
<copyright-year>2016</copyright-year>
<copyright-holder>Hanson-Smith, Johnson</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1004976"/>
<abstract>
<p>The method of phylogenetic ancestral sequence reconstruction is a powerful approach for studying evolutionary relationships among protein sequence, structure, and function. In particular, this approach allows investigators to (1) reconstruct and “resurrect” (that is, synthesize <italic>in vivo</italic> or <italic>in vitro</italic>) extinct proteins to study how they differ from modern proteins, (2) identify key amino acid changes that, over evolutionary timescales, have altered the function of the protein, and (3) order historical events in the evolution of protein function. Widespread use of this approach has been slow among molecular biologists, in part because the methods require significant computational expertise. Here we present PhyloBot, a web-based software tool that makes ancestral sequence reconstruction easy. Designed for non-experts, it integrates all the necessary software into a single user interface. Additionally, PhyloBot provides interactive tools to explore evolutionary trajectories between ancestors, enabling the rapid generation of hypotheses that can be tested using genetic or biochemical approaches. Early versions of this software were used in previous studies to discover genetic mechanisms underlying the functions of diverse protein families, including V-ATPase ion pumps, DNA-binding transcription regulators, and serine/threonine protein kinases. PhyloBot runs in a web browser, and is available at the following URL: <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com/" xlink:type="simple">http://www.phylobot.com</ext-link>. The software is implemented in Python using the Django web framework, and runs on elastic cloud computing resources from Amazon Web Services. Users can create and submit jobs on our free server (at the URL listed above), or use our open-source code to launch their own PhyloBot server.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100000002</institution-id>
<institution>National Institutes of Health</institution>
</institution-wrap>
</funding-source>
<award-id>1F32GM108299-01</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Hanson-Smith</surname>
<given-names>Victor</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100000002</institution-id>
<institution>National Institutes of Health</institution>
</institution-wrap>
</funding-source>
<award-id>ROI GM37049</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Johnson</surname>
<given-names>Alexander D</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>This work was supported by NIH 1F32GM108299-01 (NRSA F32), NIH ROI GM37049 (R01), and an Amazon Web Services in Education Research Grant. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="1"/>
<page-count count="10"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>The open source code for our software is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/vhsvhs/phylobot-django" xlink:type="simple">https://github.com/vhsvhs/phylobot-django</ext-link>. A running instance of the PhyloBot server can be used at <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com" xlink:type="simple">http://www.phylobot.com</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<disp-quote>
<p>“This is a <italic>PLOS Computational Biology</italic> Software paper.”</p>
</disp-quote>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Over the last decade, several innovative studies analyzed evolutionary trajectories of ancient genes in order to discover important relationships between present-day gene sequence, structure, and function [<xref ref-type="bibr" rid="pcbi.1004976.ref001">1</xref>–<xref ref-type="bibr" rid="pcbi.1004976.ref006">6</xref>]. These discoveries relied on the methods of ancestral sequence reconstruction, in which models of amino acid evolution are used to infer ancient protein sequences at multiple points in a gene family history [<xref ref-type="bibr" rid="pcbi.1004976.ref007">7</xref>]. Ancestral proteins have been “resurrected” in several cases [<xref ref-type="bibr" rid="pcbi.1004976.ref008">8</xref>]; that is, they have been expressed in living cells deleted for the modern descendant and purified and studied <italic>in vitro</italic>. Comparisons with the modern counterparts led to the discovery of key amino acid residues responsible for the biochemical diversity among related members of a gene family (for a review see [<xref ref-type="bibr" rid="pcbi.1004976.ref009">9</xref>]). The method also allows the evolutionary path to a modern protein to be accurately reconstructed, illustrating how “permissible” trajectories circumvent fitness barriers and produce novelty. This analysis is not possible without ancestral reconstruction.</p>
<p>Many questions in molecular and cell biology could be addressed using ancestral protein analysis. One obstacle is that the typical protocol for ancestral reconstruction involves multiple steps that require significant expertise with computational phylogenetics. In brief, the protocol begins with a collection of orthologous protein sequences sampled from diverse species. Next, the sequences are aligned to each other, their phylogenetic relationships are inferred, probabilities of ancestral sequences are computed at internal phylogenetic nodes, and then mutations (which covert ancestral to modern proteins, or vice versa) are identified on every phylogenetic branch. The rigorous application of this protocol can be challenging because it is not implemented as a single software package. Rather, ancestral reconstruction currently requires dozens of software tools, the computational skills to combine them, knowledge about phylogenetic models, and the programming abilities to deal with multiple file formats (many of them esoteric).</p>
<p>PhyloBot, described here, is new software that automates ancestral sequence reconstruction. It provides a user interface that greatly simplifies the reconstruction process, and also includes visual tools to analyze ancestors. Specifically designed for bench scientists unfamiliar with bioinformatics, the software runs in web browsers and it requires no installation on users’ computers. Rather, PhyloBot uses elastic computing resources in the Amazon cloud. Moreover, results from PhyloBot analyses are portable: every ancestral reconstruction receives a permanent URL that can be shared with colleagues and used in publications. We believe PhyloBot is a significant methodological advance for computational molecular biology, one that will hopefully inspire widespread use of ancestral protein analysis.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Design and Implementation</title>
<p>PhyloBot is a web portal that automates the reconstruction of ancestral amino acid sequences. The portal provides interactive web tools to compose and launch analysis jobs on remote supercomputers. The tools are easy-to-use and conceal a great deal of underlying automation. To start, users upload a FASTA-formatted text file containing a collection of related protein sequences (<xref ref-type="fig" rid="pcbi.1004976.g001">Fig 1</xref>). There is no minimum requirement for the degree of relatedness between the sequences, but in general, only conserved portions of a protein can be reconstructed accurately. For most investigations, the evolutionary trajectory of conserved regions of proteins are the principle interest. PhyloBot flows the sequences automatically through six major stages of analysis, using a dozen different software packages (<xref ref-type="table" rid="pcbi.1004976.t001">Table 1</xref>). Upon completion, the results from all stages can viewed in a web browser.</p>
<fig id="pcbi.1004976.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1004976.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Summary of PhyloBot automated pipeline.</title>
<p>A user begins by uploading a collection of orthologous protein sequences in a FASTA-formatted text file. PhyloBot reads the sequence collection and launches its automated analysis pipeline, which includes sequence alignment, phylogenetic model-fitting, tests of branch support, ancestral sequence reconstruction, and prediction of functional genetics. Upon completion, the results can be viewed in a web browser.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1004976.g001" xlink:type="simple"/>
</fig>
<table-wrap id="pcbi.1004976.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1004976.t001</object-id>
<label>Table 1</label> <caption><title>Software incorporated in the PhyloBot analysis pipeline.</title> <p>PhyloBot uses several existing software tools at various stages in its automated analysis pipeline.</p></caption>
<alternatives>
<graphic id="pcbi.1004976.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1004976.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Software</th>
<th align="left">Purpose</th>
<th align="left">Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">MUSCLE v3.8.31</td>
<td align="left">Multiple Sequence Alignment</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref010">10</xref>]</td>
</tr>
<tr>
<td align="left">MSAProbs 0.9 5r1</td>
<td align="left">Multiple Sequence Alignment</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref011">11</xref>]</td>
</tr>
<tr>
<td align="left">FastTree v2.1.7</td>
<td align="left">Rapid ML Tree Estimation (for ZORRO)</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref012">12</xref>]</td>
</tr>
<tr>
<td align="left">ZORRO</td>
<td align="left">Alignment Quality Estimation</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref013">13</xref>]</td>
</tr>
<tr>
<td align="left">RAxML v8.1.15</td>
<td align="left">ML Phylogenetic Estimation</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref014">14</xref>]</td>
</tr>
<tr>
<td align="left">PhyML v20130708</td>
<td align="left">Phylogenetic Branch Support Estimation</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref015">15</xref>,<xref ref-type="bibr" rid="pcbi.1004976.ref016">16</xref>]</td>
</tr>
<tr>
<td align="left">Lazarus v2.7.6</td>
<td align="left">Controlling CODEML</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref017">17</xref>]</td>
</tr>
<tr>
<td align="left">CODEML/PAML v4.2</td>
<td align="left">Empirical Bayesian Ancestral Sequence Reconstruction</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref018">18</xref>]</td>
</tr>
<tr>
<td align="left">DendroPy</td>
<td align="left">Manipulating Phylogenies in Software</td>
<td align="left">[<xref ref-type="bibr" rid="pcbi.1004976.ref019">19</xref>]</td>
</tr>
<tr>
<td align="left">Python Django v7</td>
<td align="left">Interactive Web Tools, Server Logic</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://www.djangoproject.com/" xlink:type="simple">http://www.djangoproject.com</ext-link></td>
</tr>
<tr>
<td align="left">Amazon Web Services</td>
<td align="left">Web Hosting</td>
<td align="left"><ext-link ext-link-type="uri" xlink:href="http://aws.amazon.com/" xlink:type="simple">http://aws.amazon.com</ext-link></td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>The front page of the PhyloBot portal provides a control panel to compose new analysis jobs (<xref ref-type="fig" rid="pcbi.1004976.g002">Fig 2A</xref>), and to check the status of existing jobs (<xref ref-type="fig" rid="pcbi.1004976.g002">Fig 2B</xref>). Composing a new job is relatively simple: a user uploads a collection of protein sequences in FASTA format, creates a unique name for the job, and specifies the “outgroup”–i.e., a group of the sequences that can be used to root the phylogenetic tree. A user can immediately launch the job using the default settings (which are appropriate for most analyses), or customize the job. The default settings will reconstruct ancestors using a collection of different sequence alignment methods and phylogenetic models. A user can optionally provide a so-called “constraint tree” that specifies evolutionary relationships among protein sequences that are assumed <italic>a priori</italic> to be true. If this tree is provided, PhyloBot will use it to restrict the phylogenetic analysis to evolutionary hypotheses that match the constraints.</p>
<fig id="pcbi.1004976.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1004976.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Screenshots from the PhyloBot web portal.</title>
<p>(A) The front page of the portal provides a control panel to create new jobs and to check the status of existing jobs. In this image, a user has five jobs; three of them are 100% complete and the other two are in progress. (B) A user can view detailed status for every job they create. The status page provides controls to start, stop, reset, and delete the job, in addition to displaying the job’s settings and the job’s current status.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1004976.g002" xlink:type="simple"/>
</fig>
<p>PhyloBot is engineered using Python Django, and it currently runs on cloud computing resources from Amazon Web Services. When a job is launched, PhyloBot acquires elastic compute nodes from Amazon. This means that all jobs are launched instantly, and there is no queue to wait. Users are welcome to use an instance of PhyloBot available at <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com/" xlink:type="simple">http://www.phylobot.com</ext-link>, or launch their own instance of PhyloBot using its open-source code.</p>
<sec id="sec003">
<title>Multiple sequence alignment</title>
<p>The inference of homology between sites in related protein sequences (i.e., multiple sequence alignment) is a necessary first step for phylogenetic analysis. Many alignment methods have been proposed [<xref ref-type="bibr" rid="pcbi.1004976.ref020">20</xref>, <xref ref-type="bibr" rid="pcbi.1004976.ref021">21</xref>], and different methods can result in conflicting phylogenetic conclusions [<xref ref-type="bibr" rid="pcbi.1004976.ref022">22</xref>]. Open reading frames are inherently difficult to align, and no single alignment method has been found to be accurate in all conditions. PhyloBot uses two different methods and compares their results: Muscle [<xref ref-type="bibr" rid="pcbi.1004976.ref010">10</xref>], and MSAProbs [<xref ref-type="bibr" rid="pcbi.1004976.ref011">11</xref>]. Both methods progressively align sequences according to a guide tree. The methods differ in their approaches to estimating the guide tree, and in their approaches to estimating the costs of sequence insertions and deletions events. PhyloBot also provides a feature for users to upload their own pre-computed sequence alignments. The uploaded alignments are then used alongside the alignments computed by Muscle and MSAProbs. After sequence alignment is complete, alignment quality is estimated using a probabilistic masking method [<xref ref-type="bibr" rid="pcbi.1004976.ref013">13</xref>].</p>
<p>PhyloBot evaluates the consistency of sequence alignments by mapping the aligned position of every residue to its corresponding position in other alignments (<xref ref-type="fig" rid="pcbi.1004976.g003">Fig 3A</xref>). This comparison reveals the extent to which an inferred “site” in one alignment may be one, two, or multiple sites in another alignment (<xref ref-type="fig" rid="pcbi.1004976.g003">Fig 3B</xref>). These differences can have significant consequences for later stages in ancestral reconstruction analysis. Specifically, the lengths of reconstructed ancestral protein sequences are determined by the number of sites in the underlying alignment. Disagreements between alignment methods, therefore, can produce ancestral sequences of different lengths. PhyloBot provides visual tools to evaluate the consistency and robustness of sequence alignments, and to rapidly examine their differences.</p>
<fig id="pcbi.1004976.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1004976.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Example of alignment robustness analysis.</title>
<p>In this simple example, orthologous amino acid sequences from five species were aligned using three different methods for multiple sequence alignment: Muscle, MSAProbs, and MAFFT. (A) PhyloBot maps the aligned position of every character across all alignments. Shown in red is the character map for the amino acids aligned into site 3 of the Muscle alignment. In the MSAProbs sequence alignment, these same residues are split across sites 3 and 4. In the MAFFT alignment, these residues are split across sites 3, 4 and 5. (B) PhyloBot displays the character map as pie charts expressing site identity relative to the Muscle alignment. PhyloBot will also show these maps relative to MSAProbs and MAFFT alignments.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1004976.g003" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec004">
<title>Phylogenetic inference</title>
<p>PhyloBot infers phylogenies using a maximum likelihood (ML) method implemented in RAxML [<xref ref-type="bibr" rid="pcbi.1004976.ref014">14</xref>]. Briefly, the ML method searches for the tree and branch lengths with the highest probability of producing the sequence alignment, based on a model of amino acid substitution [<xref ref-type="bibr" rid="pcbi.1004976.ref022">22</xref>]. Many models have been proposed to account for different evolutionary patterns. For example, some models allow for heterogeneity in the evolutionary rates at different sites [<xref ref-type="bibr" rid="pcbi.1004976.ref023">23</xref>], while other models allow for heterogeneity in the amino acid substitution process at different sites [<xref ref-type="bibr" rid="pcbi.1004976.ref024">24</xref>]. PhyloBot finds the best-fitting model from a collection of options, using the Akaike Information Criterion (AIC) to measure model fit [<xref ref-type="bibr" rid="pcbi.1004976.ref025">25</xref>]. This approach, specifically the use of the AIC, is similar to the method implemented in the popular software ProtTest [<xref ref-type="bibr" rid="pcbi.1004976.ref026">26</xref>].</p>
<p>As a consequence of the model-fitting step, PhyloBot finds ML trees for all combinations of sequence alignments and evolutionary models in its collection. This means that phylogenetic conclusion drawn from one method-model pair can be assessed for robustness across alternate methods and models (<xref ref-type="fig" rid="pcbi.1004976.g004">Fig 4</xref>). Different method-model combination can reveal discrepant phylogenies that affect interpretations of protein evolution. PhyloBot screens for these discrepancies by mapping every ancestral node to its corresponding node(s) on the trees found using different approaches. This type of ancestral node robustness analysis reveals those ancestors that are contingent on method and model choice; due to incompatible branching topologies, an ancestor may not exist on all trees.</p>
<fig id="pcbi.1004976.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1004976.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Example of ancestral node robustness analysis.</title>
<p>In this small example with protein sequences from five species, maximum likelihood phylogenies were inferred using four different evolutionary models (JTT+GAMMA, JTT+CAT, LG+GAMMA, and LG+CAT) based on three different sequence alignment methods (Muscle, MSAProbs, and MAFFT). The resulting ML phylogenies disagree in their topologies, and an ancestral node in one tree may not exist in other trees. For example, shown in red is the phylogenetic node corresponding to the most-recent ancestor of <italic>H</italic>. <italic>sapiens</italic>, <italic>M</italic>. <italic>musculus</italic>, and <italic>G</italic>. <italic>gallus</italic>, with <italic>X</italic>. <italic>tropicalis</italic> and <italic>T</italic>. <italic>teleost</italic> as the outgroup. This ancestral node is not inferred to exist when using some combinations of models and methods. Specifically, the alternate phylogenies support an evolutionary hypothesis in which the sequences from <italic>G</italic>. <italic>gallus</italic> and <italic>X</italic>. <italic>tropicalis</italic> are sister to each other. PhyloBot gathers this information about all reconstructed ancestral nodes, in order to assess the extent to which an ancestor’s existence is robust across different models and methods.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1004976.g004" xlink:type="simple"/>
</fig>
<p>The accuracy of every tree branch is estimated using approximate likelihood ratio tests (aLRT), implemented in PhyML v3.0 [<xref ref-type="bibr" rid="pcbi.1004976.ref016">16</xref>,<xref ref-type="bibr" rid="pcbi.1004976.ref017">17</xref>]. The aLRT is quick and relatively accurate compared to other confidence methods [<xref ref-type="bibr" rid="pcbi.1004976.ref027">27</xref>, <xref ref-type="bibr" rid="pcbi.1004976.ref028">28</xref>]. For ease of interpretation, PhyloBot transforms aLRT test statistics into a simple approximate likelihood ratio (aLR) as follows: <inline-formula id="pcbi.1004976.e001"><alternatives><graphic id="pcbi.1004976.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1004976.e001" xlink:type="simple"/><mml:math display="inline" id="M1"><mml:mrow><mml:mi>a</mml:mi><mml:mi>L</mml:mi><mml:mi>R</mml:mi><mml:mo>=</mml:mo><mml:msup><mml:mi>e</mml:mi><mml:mrow><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mfrac><mml:mrow><mml:mi>a</mml:mi><mml:mi>L</mml:mi><mml:mi>R</mml:mi><mml:mi>T</mml:mi></mml:mrow><mml:mn>2</mml:mn></mml:mfrac></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:msup></mml:mrow></mml:math></alternatives></inline-formula></p>
<p>The aLR for a particular branch can be interpreted as an estimated likelihood ratio between two different evolutionary hypotheses. In the first hypothesis, the true tree is the ML tree containing the branch in question. In the second hypothesis, the true tree is an alternate tree in which the branch does not exist. Using this framework, it can be said that the existence of specific phylogenetic split is estimated to be “X times more likely” than the next-best hypothesis in which that branch doesn’t exist.</p>
</sec>
<sec id="sec005">
<title>Ancestral sequence reconstruction</title>
<p>PhyloBot reconstructs ancestral protein sequences at every internal node of every ML tree, for all combinations of sequence alignment method and evolutionary model. Ancestors are reconstructed using the empirical Bayes approach [<xref ref-type="bibr" rid="pcbi.1004976.ref007">7</xref>], as implemented in the software CODEML [<xref ref-type="bibr" rid="pcbi.1004976.ref018">18</xref>]. This approach calculates a probability distribution of ancestral sequences for every ancestral node. The ML sequence for a single node can be found by concatenating the highest probability residue at each site into a string of amino acids. PhyloBot uses <italic>Lazarus</italic> [<xref ref-type="bibr" rid="pcbi.1004976.ref017">17</xref>] to control CODEML, and places ancestral insertion/deletion characters by parsimony [<xref ref-type="bibr" rid="pcbi.1004976.ref028">28</xref>]. Previous work suggests that ML ancestral sequences encode proteins that tend to overestimate thermostability [<xref ref-type="bibr" rid="pcbi.1004976.ref029">29</xref>]. Following from this work, PhyloBot computes a collection of Bayesian-sampled sequences that sometimes choose less-probable amino acids from the probability distribution.</p>
</sec>
<sec id="sec006">
<title>Exploration of mutational trajectories</title>
<p>PhyloBot provides web tools to compare ancestral protein sequences at different points in evolutionary history. Ancestral sequence comparison is a direct means to generate testable hypotheses about which residues in a protein determine its unique biochemistry. In many protein families, all members perform an analogous function, such as binding a class of substrates, but individual members exhibit biochemical variation in this function. Sequence comparisons between present-day proteins often suggest a large number of possible amino acid changes to explain observed biochemical differences. In contrast, comparisons between ancestral sequences on relevant phylogenetic branches may reveal a smaller set of candidate residues with fewer false-positives [<xref ref-type="bibr" rid="pcbi.1004976.ref030">30</xref>].</p>
</sec>
</sec>
<sec id="sec007" sec-type="results">
<title>Results</title>
<p>PhyloBot has been used to discover genetic mechanisms underlying biochemical diversity in several protein families, including protein kinases [<xref ref-type="bibr" rid="pcbi.1004976.ref004">4</xref>], DNA-binding transcription regulators [<xref ref-type="bibr" rid="pcbi.1004976.ref003">3</xref>], and transmembrane ion pumps [<xref ref-type="bibr" rid="pcbi.1004976.ref031">31</xref>]. In these studies, ancestral reconstructions from PhyloBot were also used to order key evolutionary steps. Interactive results from these projects can be viewed in a web browser at the following URLS: <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com/cmgc" xlink:type="simple">http://www.phylobot.com/cmgc</ext-link>, <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com/mcm1" xlink:type="simple">http://www.phylobot.com/mcm1</ext-link>, and <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com/VATPase" xlink:type="simple">http://www.phylobot.com/VATPase</ext-link>. The methods of ancestral reconstruction can be applied to nearly any protein family, regardless of its age or diversity. The accuracy of a reconstruction is correlated with conservation; this means that functionally important interaction domains are generally reconstructed with higher accuracy than poorly conserved regions, such as polypeptide linkers.</p>
<p>PhyloBot provides an ancestral library viewer to interact with results from completed analyses (<xref ref-type="fig" rid="pcbi.1004976.g005">Fig 5</xref>). In practice, PhyloBot deduces from modern protein sequences the ancestral sequences, expressed as probabilities of a given amino acid at any branching point in the phylogenetic tree. In many cases, the probability is sufficiently high that the ancestral protein can be “resurrected” with high accuracy. Every ancestral library gets a unique URL, which is permanent and can be shared with collaborators, or anyone else interested in viewing the ancestors. Users register for an account with PhyloBot, and analyses submitted by a particular user are visible only by him/her unless the analysis URL is shared. The ancestral viewer displays results from all stages of the PhyloBot analysis: sequence alignments, trees, ancestors, statistical support, and mutations on branches.</p>
<fig id="pcbi.1004976.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1004976.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Screenshots from the PhyloBot ancestral library viewer.</title>
<p>The images shown come from the Ancestral Library computed for the CMGC protein family [<xref ref-type="bibr" rid="pcbi.1004976.ref031">31</xref>]. (A) The library viewer displays an interactive tree for exploring reconstructed protein ancestors. Users select the maximum likelihood tree based on the alignment method and evolutionary model, and then click on ancestral nodes within that tree. (B) PhyloBot gathers summary statistics about every ancestral node. Shown here is the support summary for ancestral Node 401 in the CMGC family, reconstructed using msaprobs and PROTCATLG. The histogram bins the sequence sites of Node 401 according to their amino acid probability support. In this case, a majority of sites have support of 0.9 or greater. The line graph expresses the probability of the maximum likelihood amino acid residue, along with the second-best and third-best reconstructed residues; the line graph is a quick way to visually determine which protein domains were reconstructed with strong support. In this example, there is an unstructured region in the C-terminus that was reconstructed with low support. (C) PhyloBot shows details about every site in every reconstructed ancestor. Shown here is the probability support by site for Node 401 in CMGC. Users can optionally map this data to extant sequences. For example, here a user selected Homo sapiens CDK6. In the table the first column displays the sequence site in the MSAProbs alignment, the second column expresses the site number and best amino acid state in the reconstructed ancestor Node 401, the third column expresses the site number and amino acid state in Homo sapiens CDK6, the fourth column expresses the full probability distribution of all amino acid states reconstructed at that site in Node 401.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1004976.g005" xlink:type="simple"/>
</fig>
<p>The methods of ancestral reconstruction are ideal for examination of protein families with one or more diverse biochemical functions that can be assayed in molecular experiments. In these cases, PhyloBot is well-suited to guide experimentalists toward identification of the residues that determine functional variation across a protein family.</p>
</sec>
<sec id="sec008">
<title>Availability and Future Directions</title>
<p>PhyloBot is available to use at <ext-link ext-link-type="uri" xlink:href="http://www.phylobot.com/" xlink:type="simple">http://www.phylobot.com</ext-link>, and its source code is available at <ext-link ext-link-type="uri" xlink:href="https://github.com/vhsvhs/phylobot-django" xlink:type="simple">https://github.com/vhsvhs/phylobot-django</ext-link>. Future versions of PhyloBot will include an expanded suite of alignment methods and phylogenetic models.</p>
</sec>
</body>
<back>
<ack>
<p>We thank Liam Holt from University of California, Berkeley, for providing comments on previous versions of PhyloBot. We thank Joe Thornton at University of Chicago for inspiring the analysis pipeline implemented in PhyloBot. We thank members of the Johnson Lab at University of California San Francisco for comments on this manuscript.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pcbi.1004976.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ortlund</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Bridgham</surname> <given-names>JT</given-names></name>, <name name-style="western"><surname>Redinbo</surname> <given-names>MR</given-names></name>, <name name-style="western"><surname>Thornton</surname> <given-names>JW</given-names></name>. <article-title>Crystal structure of an ancient protein: evolution by conformational epistasis</article-title>. <source>Science</source> <year>2007</year>, <volume>317</volume>, <fpage>1544</fpage>–<lpage>8</lpage>. <object-id pub-id-type="pmid">17702911</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bridgham</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Ortlund</surname> <given-names>E</given-names></name>, and <name name-style="western"><surname>Thornton</surname> <given-names>JW</given-names></name>. <article-title>Evolution of a New Function by Degenerative Mutation in Cephalochordate Steroid Receptors</article-title>. <source>PLoS Genetics</source> <year>2009</year>, <volume>4</volume>(<issue>9</issue>).</mixed-citation></ref>
<ref id="pcbi.1004976.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Baker</surname> <given-names>CB</given-names></name>, <name name-style="western"><surname>Hanson-Smith</surname> <given-names>V</given-names></name>, and <name name-style="western"><surname>Johnson</surname> <given-names>AD</given-names></name>. <article-title>Following gene duplication, paralog interference constrains transcriptional circuit evolution</article-title>. <source>Science</source> <year>2013</year>, <volume>342</volume>, <fpage>104</fpage>–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1126/science.1240810" xlink:type="simple">10.1126/science.1240810</ext-link></comment> <object-id pub-id-type="pmid">24092741</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Howard</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Hanson-Smith</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Kennedy</surname> <given-names>KJ</given-names></name>, <name name-style="western"><surname>Miller</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Lou</surname> <given-names>HJ</given-names></name>, <name name-style="western"><surname>Johnson</surname> <given-names>AJ</given-names></name>, <etal>et al</etal>. <article-title>Ancestral resurrection reveals evolutionary mechanisms of kinase plasticity</article-title>. <source>eLife</source> <year>2014</year>, <volume>3</volume>:<fpage>e04126</fpage></mixed-citation></ref>
<ref id="pcbi.1004976.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>McKeown</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Bridgham</surname> <given-names>JT</given-names></name>, <name name-style="western"><surname>Anderson</surname> <given-names>DW</given-names></name>, <name name-style="western"><surname>Murphy</surname> <given-names>MN</given-names></name>, <name name-style="western"><surname>Ortlund</surname> <given-names>EA</given-names></name>, <name name-style="western"><surname>Thornton</surname> <given-names>JW</given-names></name>. <article-title>Evolution of DNA specificity in a transcription factor family produced a new gene regulatory module</article-title>. <source>Cell</source> <year>2014</year>, <volume>159</volume>, <fpage>58</fpage>–<lpage>68</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1016/j.cell.2014.09.003" xlink:type="simple">10.1016/j.cell.2014.09.003</ext-link></comment> <object-id pub-id-type="pmid">25259920</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kratzer</surname> <given-names>JT</given-names></name>, <name name-style="western"><surname>Lanaspa</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Murphy</surname> <given-names>MN</given-names></name>, <name name-style="western"><surname>Cicerchi</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Graves</surname> <given-names>CL</given-names></name>, <name name-style="western"><surname>Tipton</surname> <given-names>PA</given-names></name>, <etal>et al</etal>. <article-title>Evolutionary history and metabolic insights of ancient mammalian uricases</article-title>. <source>Proceeding of the National Academy of Sciences USA</source> <year>2014</year>, <volume>111</volume>(<issue>10</issue>), <fpage>3763</fpage>–<lpage>8</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yang</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Kumar</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Nei</surname> <given-names>M</given-names></name>. <article-title>A New Method of Inference of Ancestral Nucleotide and Amino Acid Sequences</article-title>. <source>Genetics</source> <year>1995</year>, <volume>141</volume>, <fpage>1641</fpage>–<lpage>1650</lpage>. <object-id pub-id-type="pmid">8601501</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Thornton</surname> <given-names>JW</given-names></name>. <article-title>Resurrecting Ancient Genes: Experimental Analysis of Extinct Molecules</article-title>. <source>Nature Reviews Genetics</source> <year>2004</year>, <volume>5</volume>, <fpage>366</fpage>–<lpage>75</lpage>. <object-id pub-id-type="pmid">15143319</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Harms</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Thornton</surname> <given-names>JW</given-names></name>. <article-title>Evolutionary biochemistry: revealing the historical and physical causes of protein properties</article-title>. <source>Nature Reviews Genetics</source> <year>2013</year>. <volume>14</volume>(<issue>8</issue>), <fpage>559</fpage>–<lpage>571</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nrg3540" xlink:type="simple">10.1038/nrg3540</ext-link></comment> <object-id pub-id-type="pmid">23864121</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Edgar</surname> <given-names>RC</given-names></name>. <article-title>MUSCLE: multiple sequence alignment with high accuracy and high throughput</article-title>. <source>Nucleic Acids Research</source> <year>2004</year>, <volume>32</volume>(<issue>5</issue>), <fpage>1792</fpage>–<lpage>97</lpage>. <object-id pub-id-type="pmid">15034147</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Liu</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Schmidt</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Maskell</surname> <given-names>DL</given-names></name>. <article-title>MSAProbs: multiple sequence alignment based on pair hidden Markov models and partition function posterior probabilities</article-title>. <source>Bioinformatics</source> <year>2010</year>, <volume>26</volume>(<issue>16</issue>), <fpage>1958</fpage>–<lpage>64</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1093/bioinformatics/btq338" xlink:type="simple">10.1093/bioinformatics/btq338</ext-link></comment> <object-id pub-id-type="pmid">20576627</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Price</surname> <given-names>MN</given-names></name>, <name name-style="western"><surname>Dehal</surname> <given-names>PS</given-names></name>, <name name-style="western"><surname>Arkin</surname> <given-names>AP</given-names></name> (<year>2010</year>) <article-title>FastTree 2 –Approximately Maximum-Likelihood Trees for Large Alignments</article-title>. <source>PLoS ONE</source>, <volume>5</volume>(<issue>3</issue>):<fpage>e9490</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pone.0009490" xlink:type="simple">10.1371/journal.pone.0009490</ext-link></comment> <object-id pub-id-type="pmid">20224823</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wu</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Chatterji</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Eisen</surname> <given-names>JA</given-names></name>. <article-title>Accounting for alignment uncertainty in phylogenomics</article-title>. <source>PLoS One</source> <year>2012</year>, <volume>7</volume>, <fpage>e30288</fpage>, <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pone.0030288" xlink:type="simple">10.1371/journal.pone.0030288</ext-link></comment> <object-id pub-id-type="pmid">22272325</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Stamatakis</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Ludwig</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Meier</surname> <given-names>H</given-names></name>. <article-title>RAxML-III: a fast program for maximum likelihood-based inference of large phylogenetic trees</article-title>. <source>Bioinformatics</source> <year>2005</year>, <volume>21</volume>(<issue>4</issue>), <fpage>456</fpage>–<lpage>63</lpage>. <object-id pub-id-type="pmid">15608047</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Anisimova</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Gascuel</surname> <given-names>O</given-names></name>. (2006) <article-title>Approximate Likelihood-Ratio Test for Branches: A Fast, Accurate, and Powerful alternative</article-title>. <source>Systematic Biology</source> <year>2006</year>, <volume>4</volume>, <fpage>539</fpage>–<lpage>552</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Guindon</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Dufayard</surname> <given-names>JF</given-names></name>, <name name-style="western"><surname>Lefort</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Anisimova</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Hordijk</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Gascuel</surname> <given-names>O</given-names></name>. <article-title>New Algorithms and Methods to Estimate Maximum-Likelihood Phylogenies: Assessing the Performance of PhyML 3.0</article-title>. <source>Systematic Biology</source> <year>2010</year>. <volume>59</volume>(<issue>3</issue>). <fpage>307</fpage>–<lpage>321</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1093/sysbio/syq010" xlink:type="simple">10.1093/sysbio/syq010</ext-link></comment> <object-id pub-id-type="pmid">20525638</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hanson-Smith</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Kolaczkowski</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Thornton</surname> <given-names>JM</given-names></name>. <article-title>Robustness of Ancestral Sequence Reconstruction to Phylogenetic Uncertainty</article-title>. <source>Molecular Biology and Evolution</source> <year>2010</year>, <volume>27</volume>(<issue>9</issue>), <fpage>1988</fpage>–<lpage>99</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1093/molbev/msq081" xlink:type="simple">10.1093/molbev/msq081</ext-link></comment> <object-id pub-id-type="pmid">20368266</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yang</surname> <given-names>Z</given-names></name>. <article-title>PAML 4: Phylogenetic Analysis by Maximum Likelihood</article-title>. <source>Molecular Biology and Evolution</source> <year>2007</year>, <volume>24</volume>(<issue>8</issue>), <fpage>1586</fpage>–<lpage>1591</lpage>. <object-id pub-id-type="pmid">17483113</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sukumaran</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Holder</surname> <given-names>MT</given-names></name>. <article-title>DendroPy: a Python library for phylogenetic computing</article-title>. <source>Bioinformatics</source> <year>2010</year>, <volume>26</volume>(<issue>12</issue>), <fpage>1569</fpage>–<lpage>71</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1093/bioinformatics/btq228" xlink:type="simple">10.1093/bioinformatics/btq228</ext-link></comment> <object-id pub-id-type="pmid">20421198</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Edgar</surname> <given-names>RC</given-names></name>, <name name-style="western"><surname>Batzoglou</surname> <given-names>S</given-names></name>. <article-title>Multiple Sequence Alignment</article-title>. <source>Current Opinion in Structural Biology</source> <year>2006</year>, <volume>16</volume>, <fpage>368</fpage>–<lpage>373</lpage>. <object-id pub-id-type="pmid">16679011</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wong</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Suchard</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Huelsenbeck</surname> <given-names>JP</given-names></name>. <article-title>Alignment Uncertainty and Genomic Analysis</article-title>. <source>Science</source> <year>2008</year>, <volume>319</volume>, <fpage>416</fpage>–<lpage>17</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref022"><label>22</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Felsenstein</surname> <given-names>J</given-names></name>. <source>Inferring Phylogenies</source>. <publisher-name>Sinaur Associations, Inc.</publisher-name>; <year>2004</year>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yang</surname> <given-names>Z</given-names></name>. <article-title>Maximum likelihood phylogenetic estimation from DNA sequences with variable rates over sites: approximate methods</article-title>. <source>Journal of Molecular Evolution</source> <year>1994</year>, <volume>39</volume>(<issue>3</issue>), <fpage>306</fpage>–<lpage>14</lpage>. <object-id pub-id-type="pmid">7932792</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lartillot</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Philippe</surname> <given-names>H</given-names></name>. <article-title>A Bayesian mixture model for across-site heterogeneities in the amino-acid replacement process</article-title>. <source>Molecular Biology and Evolution</source> <year>2004</year>. <volume>21</volume>(<issue>6</issue>), <fpage>1095</fpage>–<lpage>1109</lpage>. <object-id pub-id-type="pmid">15014145</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Akaike</surname> <given-names>H</given-names></name>. <article-title>Information Theory and an Extension of the Maximum Likelihood Principle</article-title>. <source>Proceedings of the 2nd International Symposium on Information Theory</source> <year>1973</year>, <fpage>267</fpage>–<lpage>81</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Abascal</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Zardoya</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Posada</surname> <given-names>D</given-names></name>. <article-title>ProtTest: selection of best-fit models of protein evolution</article-title>. <source>Bioinformatics</source> <year>2005</year>, <volume>21</volume>, <fpage>2104</fpage>–<lpage>05</lpage>. <object-id pub-id-type="pmid">15647292</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Anisimova</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Gil</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Dufayard</surname> <given-names>JF</given-names></name>, <name name-style="western"><surname>Dessimoz</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Gascuel</surname> <given-names>O</given-names></name>. <article-title>Survey of Branch Support Methods Demonstrates Accuracy, Power, and Robustness of Fast Likelihood-based Approximation Schemes</article-title>. <source>Systematic Biology</source> <year>2011</year>. <volume>60</volume>(<issue>5</issue>), <fpage>685</fpage>–<lpage>699</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1093/sysbio/syr041" xlink:type="simple">10.1093/sysbio/syr041</ext-link></comment> <object-id pub-id-type="pmid">21540409</object-id></mixed-citation></ref>
<ref id="pcbi.1004976.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Fitch</surname> <given-names>W</given-names></name>. <article-title>Toward Defining the Course of Evolution: Minimum Change for a Specific Tree Topology</article-title>. <source>Systematic Zoology</source> <year>1971</year>, <volume>20</volume>(<issue>4</issue>), <fpage>406</fpage>–<lpage>16</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Williams</surname> <given-names>PD</given-names></name>, <name name-style="western"><surname>Pollock</surname> <given-names>DD</given-names></name>, <name name-style="western"><surname>Blackburne</surname> <given-names>BP</given-names></name>, <name name-style="western"><surname>Goldstein</surname> <given-names>RA</given-names></name>. <article-title>Assessing the Accuracy of Ancestral Protein Reconstruction Methods</article-title>. <source>PLoS Computational Biology</source> <year>2006</year>, <volume>2</volume>(<issue>6</issue>), <fpage>598</fpage>–<lpage>604</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Harms</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Thornton</surname> <given-names>JM</given-names></name>. <article-title>Analyzing protein structure and function using ancestral gene reconstruction</article-title>. <source>Current Opinion Structural Biology</source> <year>2010</year>, <volume>20</volume>(<issue>3</issue>), <fpage>360</fpage>–<lpage>6</lpage>.</mixed-citation></ref>
<ref id="pcbi.1004976.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Finnigan</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Hanson-Smith</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Stevens</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Thornton</surname> <given-names>JW</given-names></name>. <article-title>Evolution of increased complexity in a molecular machine</article-title>. <source>Nature</source> <year>2012</year>, <volume>481</volume>, <fpage>360</fpage>–<lpage>4</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature10724" xlink:type="simple">10.1038/nature10724</ext-link></comment> <object-id pub-id-type="pmid">22230956</object-id></mixed-citation></ref>
</ref-list>
</back>
</article>