<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="3.0" xml:lang="EN">
  <front>
    <journal-meta><journal-id journal-id-type="publisher-id">plos</journal-id><journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id><journal-id journal-id-type="pmc">ploscomp</journal-id><!--===== Grouping journal title elements =====--><journal-title-group><journal-title>PLoS Computational Biology</journal-title></journal-title-group><issn pub-type="ppub">1553-734X</issn><issn pub-type="epub">1553-7358</issn><publisher>
        <publisher-name>Public Library of Science</publisher-name>
        <publisher-loc>San Francisco, USA</publisher-loc>
      </publisher></journal-meta>
    <article-meta><article-id pub-id-type="publisher-id">PCOMPBIOL-D-11-01159</article-id><article-id pub-id-type="doi">10.1371/journal.pcbi.1002413</article-id><article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Article</subject>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Biology</subject>
          <subj-group>
            <subject>Computational biology</subject>
            <subj-group>
              <subject>Population modeling</subject>
            </subj-group>
          </subj-group>
          <subj-group>
            <subject>Population biology</subject>
            <subj-group>
              <subject>Epidemiology</subject>
            </subj-group>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Medicine</subject>
          <subj-group>
            <subject>Infectious diseases</subject>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline">
          <subject>Public Health and Epidemiology</subject>
          <subject>Infectious Diseases</subject>
          <subject>Computational Biology</subject>
        </subj-group>
      </article-categories><title-group><article-title>Inferring Epidemic Contact Structure from Phylogenetic Trees</article-title><alt-title alt-title-type="running-head">Epidemic Contact Structure from Phylogenetic Trees</alt-title></title-group><contrib-group>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Leventhal</surname>
            <given-names>Gabriel E.</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
          <xref ref-type="corresp" rid="cor1">
            <sup>*</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Kouyos</surname>
            <given-names>Roger</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">
            <sup>2</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Stadler</surname>
            <given-names>Tanja</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>von Wyl</surname>
            <given-names>Viktor</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">
            <sup>3</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Yerly</surname>
            <given-names>Sabine</given-names>
          </name>
          <xref ref-type="aff" rid="aff4">
            <sup>4</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Böni</surname>
            <given-names>Jürg</given-names>
          </name>
          <xref ref-type="aff" rid="aff5">
            <sup>5</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Cellerai</surname>
            <given-names>Cristina</given-names>
          </name>
          <xref ref-type="aff" rid="aff6">
            <sup>6</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Klimkait</surname>
            <given-names>Thomas</given-names>
          </name>
          <xref ref-type="aff" rid="aff7">
            <sup>7</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Günthard</surname>
            <given-names>Huldrych F.</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">
            <sup>3</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Bonhoeffer</surname>
            <given-names>Sebastian</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
      </contrib-group><aff id="aff1"><label>1</label><addr-line>Institute of Integrative Biology, ETH Zurich, Zurich, Switzerland</addr-line>       </aff><aff id="aff2"><label>2</label><addr-line>Department of Ecology and Evolutionary Biology, Princeton University, Princeton, New Jersey, United States of America</addr-line>       </aff><aff id="aff3"><label>3</label><addr-line>Division of Infectious Diseases and Hospital Epidemiology, University Hospital Zurich, Zurich, Switzerland</addr-line>       </aff><aff id="aff4"><label>4</label><addr-line>Laboratory of Virology and AIDS Center, Geneva University Hospital, Geneva, Switzerland</addr-line>       </aff><aff id="aff5"><label>5</label><addr-line>Swiss National Center for Retroviruses, Institute of Medical Virology, University of Zurich, Zurich, Switzerland</addr-line>       </aff><aff id="aff6"><label>6</label><addr-line>Service of Immunology and Allergy, Lausanne University Hospital, Lausanne, Switzerland</addr-line>       </aff><aff id="aff7"><label>7</label><addr-line>Institute of Medical Microbiology, Department Biomedicine, University of Basel, Basel, Switzerland</addr-line>       </aff><contrib-group>
        <contrib contrib-type="editor" xlink:type="simple">
          <name name-style="western">
            <surname>Tanaka</surname>
            <given-names>Mark M.</given-names>
          </name>
          <role>Editor</role>
          <xref ref-type="aff" rid="edit1"/>
        </contrib>
      </contrib-group><aff id="edit1">University of New South Wales, Australia</aff><author-notes>
        <corresp id="cor1">* E-mail: <email xlink:type="simple">gabriel.leventhal@env.ethz.ch</email></corresp>
        <fn fn-type="con">
          <p>Conceived and designed the experiments: GEL RK TS SB. Performed the experiments: GEL. Analyzed the data: GEL RK TS SB. Contributed reagents/materials/analysis tools: GEL VvW SY JB CC TK HFG. Wrote the paper: GEL SB.</p>
        </fn>
      <fn fn-type="conflict">
        <p>The authors have declared that no competing interests exist.</p>
      </fn></author-notes><pub-date pub-type="collection">
        <month>3</month>
        <year>2012</year>
      </pub-date><pub-date pub-type="epub">
        <day>8</day>
        <month>3</month>
        <year>2012</year>
      </pub-date><volume>8</volume><issue>3</issue><elocation-id>e1002413</elocation-id><history>
        <date date-type="received">
          <day>4</day>
          <month>8</month>
          <year>2011</year>
        </date>
        <date date-type="accepted">
          <day>19</day>
          <month>1</month>
          <year>2012</year>
        </date>
      </history><!--===== Grouping copyright info into permissions =====--><permissions><copyright-year>2012</copyright-year><copyright-holder>Leventhal et al</copyright-holder><license><license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license></permissions><abstract>
        <p>Contact structure is believed to have a large impact on epidemic spreading and consequently using networks to model such contact structure continues to gain interest in epidemiology. However, detailed knowledge of the exact contact structure underlying real epidemics is limited. Here we address the question whether the structure of the contact network leaves a detectable genetic fingerprint in the pathogen population. To this end we compare phylogenies generated by disease outbreaks in simulated populations with different types of contact networks. We find that the shape of these phylogenies strongly depends on contact structure. In particular, measures of tree imbalance allow us to quantify to what extent the contact structure underlying an epidemic deviates from a null model contact network and illustrate this in the case of random mixing. Using a phylogeny from the Swiss HIV epidemic, we show that this epidemic has a significantly more unbalanced tree than would be expected from random mixing.</p>
      </abstract><abstract abstract-type="summary">
        <title>Author Summary</title>
        <p>One of the recent key innovations in the epidemiology of infectious diseases was the incorporation of explicit contact structure (i.e. who can infect whom) into epidemiological models. Theoretical studies have generated a broad consensus in the field that knowledge of the contact network may help to greatly improve the control of the spread of epidemics. The key problem in the field, however, is that we lack knowledge regarding the actual contact structure underlying real epidemics. Much research is focused on trying to reconstruct actual contact networks in various ways (mobile phone usage data, electronic devices that measure physical proximity, patient interviews, etc). All of these approaches are highly labour intensive and are fraught with many difficulties. Here, we present a new approach which is based on readily available sequence data. Using the Swiss HIV epidemic as an example, we show that it displays strong indications of a underlying contact structure that strongly differs from random interactions, thus undercutting the assumption of random mixing which is commonly made in epidemiological models.</p>
      </abstract><funding-group><funding-statement>This study has been supported by the Swiss National Science Foundation (SNF grant #3247B0-112594 to HFG, SY, BL, #324730-120793 to HFG, #324730-130865 to HFG, and financed in the framework of the Swiss HIV Cohort Study, supported by the Swiss National Science Foundation (SNF grant #33CS30-134277) and the SHCS projects #470, 528, 569, the SHCS Research Foundation, the European Community's Seventh Framework Programme (grant FP7/2007–2013), under the Collaborative HIV and Anti-HIV Drug Resistance Network (CHAIN; grant 223131), and by a further research grant of the Union Bank of Switzerland, in the name of a donor to HFG, and an unrestricted research grant from Tibotec, Switzerland to HFG. Further support was provided by the Novartis Foundation, formerly Ciba-Geigy Jubilee Foundation and by a Swiss National Science Foundation Grant (PBEZP3-125726) to VvW. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement></funding-group><counts>
        <page-count count="10"/>
      </counts></article-meta>
  </front>
  <body>
    <sec id="s1">
      <title>Introduction</title>
      <p>Infectious disease epidemiology has a longstanding history of mathematical modeling. Simple population dynamical models assuming random-mixing between infected and susceptible individuals have yielded important insights into the dynamics and control of infectious diseases <xref ref-type="bibr" rid="pcbi.1002413-Anderson1">[1]</xref>. The assumption of random mixing has been made primarily for reasons of mathematical tractability, but it is unclear under which conditions this assumption is actually justified.</p>
      <p>To account for the effects of non-random mixing networks that describe the explicit contact structure are increasingly being incorporated into models of infectious disease dynamics <xref ref-type="bibr" rid="pcbi.1002413-Newman1">[2]</xref>–<xref ref-type="bibr" rid="pcbi.1002413-Miller2">[7]</xref>. These models showed that important epidemiological quantities such as the size of an epidemic or requirements for an epidemic to spread depend sensitively on the contact structure <xref ref-type="bibr" rid="pcbi.1002413-May1">[8]</xref>–<xref ref-type="bibr" rid="pcbi.1002413-Keeling1">[11]</xref>. While this has led to important conceptual insights regarding disease dynamics and control, the applicability of these results to real world situations has been hampered by the paucity of data on actual contact networks. Great efforts are currently underway to infer contact structure from various sources of data <xref ref-type="bibr" rid="pcbi.1002413-Hamilton1">[12]</xref>–<xref ref-type="bibr" rid="pcbi.1002413-Salathe1">[16]</xref>. However, most approaches for the reconstruction of contact networks are highly labor-intensive. These approaches are also all based on host contact structure which is not always easily translated into contacts that are relevant for transmission. While some studies have focused on differentiating contacts that may or may not result in transmission <xref ref-type="bibr" rid="pcbi.1002413-Read1">[15]</xref>, there are many factors that can cause discrepencies between interactions of hosts and interactations that are relevant for transmission.</p>
      <p>Considerable efforts have recently been made to link methods of phylogenetic analysis with epidemiological models resulting in a new research area termed phylodynamics <xref ref-type="bibr" rid="pcbi.1002413-Grenfell1">[17]</xref>–<xref ref-type="bibr" rid="pcbi.1002413-Volz2">[19]</xref>. If the evolutionary rate of a pathogen is sufficiently fast, then it is conceivable that also the contact structure underlying an epidemic leaves a traceable fingerprint in the genetic structure of the pathogen population <xref ref-type="bibr" rid="pcbi.1002413-ODea1">[20]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Welch1">[21]</xref>. This contact network will only contain those contacts that are relevant for disease transmission and therefore could circuimvent problems of translating host contacts into disease contacts.</p>
      <p>Using simulations of pathogen populations spreading on contact networks generated by various network models we compare the resulting phylogenetic trees. We find that quantitative measurements of tree shape such as the Sackin index contain information that can be used to differentiate between different classes of contact structures. Such descriptive measures of tree topology have previously been successfully used to infer evolutionary processes from phylogenetic trees <xref ref-type="bibr" rid="pcbi.1002413-Aldous1">[22]</xref>–. Most importantly, the Sackin index can be used to test whether the contact structure significantly deviates from what would be expected under random mixing.</p>
    </sec>
    <sec id="s2">
      <title>Results</title>
      <sec id="s2a">
        <title>Network structure and tree balance</title>
        <p>To illustrate the effect of contact structure on the resulting phylogenetic tree, we perform simulations of epidemic outbreaks for three different network models: (a) the Erdös-Rényi (ER) random graph <xref ref-type="bibr" rid="pcbi.1002413-Erds1">[29]</xref>, (b) the Barabási-Albert (BA) graph <xref ref-type="bibr" rid="pcbi.1002413-Barabsi1">[30]</xref> and (c) the Watts-Strogatz (WS) graph <xref ref-type="bibr" rid="pcbi.1002413-Watts1">[31]</xref> with a low rewiring probability, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e001" xlink:type="simple"/></inline-formula> (see Methods). Both the BA and the WS are representative for two important aspects of contact heterogeneity. Networks generated by the BA model have both a large variance in the degree distribution as well as short mean path lengths. Networks generated by the WS model have low degree distribution variance and long mean path lengths. We compare these network models to a full graph (FG), which corresponds to a model with random mixing.</p>
        <p>Each of the three network models can be tuned with different parameters. The ER model generates networks with Poisson degree distributions where the mean can be varied. The BA model produces scale-free networks with a power-law degree distribution. The WS model produces networks that have a high degree of local clustering, but a degree distribution that lies between a Dirac distribution (all nodes have the same degree) and a Poisson distribution.</p>
        <p>We track the exact spreading pattern (i.e. who infects whom) of a susceptible-infected-removed (SIR) epidemic for <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e002" xlink:type="simple"/></inline-formula> different networks generated by each model to obtain the infection tree for each of these networks. The parameters for the network models are chosen such that all networks have the same mean degree, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e003" xlink:type="simple"/></inline-formula>, yet different degree distributions, path length distributions and clustering coefficients. <xref ref-type="fig" rid="pcbi-1002413-g001">Figure 1A</xref> shows the imbalance measured by the Sackin index of the resulting infection trees for the three network models and the full graph at different values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e004" xlink:type="simple"/></inline-formula>, captured by the transmissibility <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e005" xlink:type="simple"/></inline-formula> (see Methods for detailed definitions of the Sackin index and the transmissibility).</p>
        <fig id="pcbi-1002413-g001" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g001</object-id>
          <label>Figure 1</label>
          <caption>
            <title>Imbalance of the infection tree for contact networks generated by three different models.</title>
            <p>Panel A shows the Sackin index as a measure of tree imbalance. Panel B shows the size of the epidemic outbreak for each of the network models at different values of transmissibility <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e006" xlink:type="simple"/></inline-formula>. All three models have the same mean number of neighbors <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e007" xlink:type="simple"/></inline-formula>. For the BA model, each vertex added in the preferential attachment is connected to <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e008" xlink:type="simple"/></inline-formula> nodes in the existing network, resulting in a mean degree of 8. The WS networks start with a ring lattice where each node is connected to its 4 closest neighbors on each side. Every link is then rewired with probability <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e009" xlink:type="simple"/></inline-formula>. The light shaded area show the values lying between the 2.5-th and the 97.5-th percentile, the dark shaded area the values between the 25th and the 75th percentile and the solid lines are the mean of the simulations. Each data point corresponds to <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e010" xlink:type="simple"/></inline-formula> simulation runs on independent graphs. A plot showing the normalized Sackin index for these three network models can be found in the supporting <xref ref-type="supplementary-material" rid="pcbi.1002413.s009">text S2</xref>.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g001" xlink:type="simple"/>
        </fig>
        <p>For large values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e011" xlink:type="simple"/></inline-formula> (large <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e012" xlink:type="simple"/></inline-formula>) the whole network is infected, independent of the contact structure. Not surprisingly, the epidemic size is similar for all network types in this parameter range, since almost all individuals in the population eventually become infected before the epidemic dies out. The balancedness of the resulting trees, however, differ significantly for the three networks types. The ER model is virtually indistinguishable from the random mixing model (FG). For sufficiently large <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e013" xlink:type="simple"/></inline-formula> the BA model has higher imbalance than the ER and FG. Finally, the most striking difference in imbalance is observed for the WS.</p>
        <p>For low <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e014" xlink:type="simple"/></inline-formula> (low <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e015" xlink:type="simple"/></inline-formula>) the imbalance vanishes for all networks for the simple reason that no epidemic outbreak occurs (see <xref ref-type="fig" rid="pcbi-1002413-g001">Figure 1B</xref>). Interestingly, the imbalance is generally largest at <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e016" xlink:type="simple"/></inline-formula>, where the transmissibility is just large enough for an epidemic outbreak to occur. In this case each individual infects just one other individual on average, which results in an infection tree that continuously mostly branches off to one side and thus is maximally unbalanced (<xref ref-type="supplementary-material" rid="pcbi.1002413.s002">Figure S2</xref>).</p>
        <p>For all network types except the BA model, the imbalance of the transmission tree is maximal for values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e017" xlink:type="simple"/></inline-formula> right around <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e018" xlink:type="simple"/></inline-formula>, but then converges to a smaller value as <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e019" xlink:type="simple"/></inline-formula> approaches unity. This can be explained by the fact that the SIR infection process is equivalent to a birth-death process. When <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e020" xlink:type="simple"/></inline-formula>, the death rate is de facto zero (birth rate <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e021" xlink:type="simple"/></inline-formula> death rate) and thus all lineages survive to the end. If the death rate vanishes the expected imbalance of the resulting transmission trees is minimal and is given by the Yule model (see Methods).</p>
        <p>The level of imbalance of the transmission trees for the different network types shown in <xref ref-type="fig" rid="pcbi-1002413-g001">Figure 1</xref> obviously depends on the choice of the network model parameters. In the following we will investigate how imbalance depends on the average number of neighbors and on local connectivity. Moreover, we henceforth use the expected Sackin index given by the Yule model to define a normalized Sackin index (see Methods), which has an expected value of zero for infection trees based on an SIR model with death rate zero.</p>
      </sec>
      <sec id="s2b">
        <title>The effect of the mean number of neighbors on tree imbalance</title>
        <p>We focus on the ER graph because in the limit of a large number of neighbors this model is expected to converge to the random mixing model. Furthermore, to eliminate contributions to imbalance resulting from a non-zero death rate we show the results for <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e022" xlink:type="simple"/></inline-formula>. <xref ref-type="fig" rid="pcbi-1002413-g002">Figure 2</xref> shows the imbalance for an ER model with <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e023" xlink:type="simple"/></inline-formula> nodes and an average number of neighbors <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e024" xlink:type="simple"/></inline-formula>. The effect of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e025" xlink:type="simple"/></inline-formula> on networks generated by the WS and BA model are reported in the supporting <xref ref-type="supplementary-material" rid="pcbi.1002413.s009">text S2</xref>.</p>
        <fig id="pcbi-1002413-g002" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g002</object-id>
          <label>Figure 2</label>
          <caption>
            <title>Imbalance (normalized Sackin index) of the infection tree for ER random graphs with varying number of average neighbors.</title>
            <p>The light shaded area show the values lying between the 2.5-th and 97.5-th percentile, the dark shaded area those between the 25th and 75th percentile. The dashed line is the expected value of the imbalance for a tree with the same number of leaves under the Yule model (equation (7)). The transmissibility is chosen <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e026" xlink:type="simple"/></inline-formula>. The inset show the same data points on a log-log scale.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g002" xlink:type="simple"/>
        </fig>
        <p>Increasing the mean number of neighbors essentially increases the number of infections caused by a single individual and therefore the imbalance is expected to decreases with increasing number of neighbors. This is confirmed by the results presented in <xref ref-type="fig" rid="pcbi-1002413-g002">Figure 2</xref>. A small average number of neighbors results in more unbalanced transmission trees for a reason that is similar to why nonzero death rates increase imbalance. Once a node has infected all of its <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e027" xlink:type="simple"/></inline-formula> neighbors, it can no longer infect anyone else and is essentially removed from the system despite remaining infectious.</p>
      </sec>
      <sec id="s2c">
        <title>The effect of mean path length on tree imbalance</title>
        <p>In the WS model, the mean path length is directly related to the rewiring probability <xref ref-type="bibr" rid="pcbi.1002413-Watts1">[31]</xref>. The WS model with rewiring probability <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e028" xlink:type="simple"/></inline-formula> essentially generates the same type of network as the ER model. Therefore the imbalance of the transmission trees resulting from epidemics spreading on such networks should converge with increasing rewiring probability to the same value as for ER random graphs. <xref ref-type="fig" rid="pcbi-1002413-g003">Figure 3</xref> shows imbalance as a function of the rewiring probability and transmissibility.</p>
        <fig id="pcbi-1002413-g003" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g003</object-id>
          <label>Figure 3</label>
          <caption>
            <title>Normalized Sackin index for epidemics occurring on Watts-Strogatz graphs with varying rewiring probability.</title>
            <p>The total size of the population is <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e029" xlink:type="simple"/></inline-formula> and the color shows the size of the epidemic outbreak. The network is constructed by connected each node to its 8 closest neighbors on a ring lattice, and then rewiring each link with a probability <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e030" xlink:type="simple"/></inline-formula>. At low rewiring and transmission probabilities, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e031" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e032" xlink:type="simple"/></inline-formula> respectively, the epidemic only infects a small portion of the population and thus the Sackin index remains fairly small. Imbalance is largest for values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e033" xlink:type="simple"/></inline-formula> close to the critical value <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e034" xlink:type="simple"/></inline-formula> where the epidemic transition occurs.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g003" xlink:type="simple"/>
        </fig>
        <p>We identify two limiting cases for the imbalance of the epidemic. For values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e035" xlink:type="simple"/></inline-formula> there is essentially no epidemic outbreak and the imbalance remains small. For values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e036" xlink:type="simple"/></inline-formula> close to but larger than <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e037" xlink:type="simple"/></inline-formula> an epidemic can occur and the imbalance is maximal. As <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e038" xlink:type="simple"/></inline-formula> increases further, the number of shortcuts in the network increases and the mean path length decreases, as does the imbalance. For values substantially larger than <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e039" xlink:type="simple"/></inline-formula> the network converges to something similar to an ER graph and the hence normalized imbalance converges to zero (for <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e040" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e041" xlink:type="simple"/></inline-formula>) or to a fixed value for finite populations and small mean degree.</p>
      </sec>
      <sec id="s2d">
        <title>Analytical insight</title>
        <p>In the supporting <xref ref-type="supplementary-material" rid="pcbi.1002413.s008">text S1</xref> we derive an analytical approximation for the normalized Sackin index given the transmission network (see <xref ref-type="supplementary-material" rid="pcbi.1002413.s001">Figure S1</xref>),<disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e042" xlink:type="simple"/><label>(1)</label></disp-formula>Here, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e043" xlink:type="simple"/></inline-formula> is the average number of infections caused by an infected individual until that individual is removed (i.e. the excess degree in the transmission network) and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e044" xlink:type="simple"/></inline-formula> is the mean shortest path length in the transmission network. This equation shows that assuming the transmission network were known, imbalance depends on one hand on the mean path length, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e045" xlink:type="simple"/></inline-formula>, and on the other hand on the average excess degree <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e046" xlink:type="simple"/></inline-formula>. For networks generated by the configuration model, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e047" xlink:type="simple"/></inline-formula> depends on the first and second moment of the degree distribution. BA networks are characterized by a large degree distribution variance, as well as a short mean path length. For low rewiring probabilities, WS networks have small degree distribution variances and large mean path lengths. These observations together with the analytical approximation in equation (1) can help explain why it is not always possible to distinguish between the BA and WS models when considering the Sackin index as a measure of tree topology (see <xref ref-type="fig" rid="pcbi-1002413-g001">Figure 1</xref>). This ambiguity is most pronounced when considering two idealized networks: a chain and a star. These two topologically very different networks would result in identical transmission trees (see <xref ref-type="supplementary-material" rid="pcbi.1002413.s002">Figure S2</xref>) and therefore be indistinguishable using tree imbalance alone.</p>
        <p>Note that <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e048" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e049" xlink:type="simple"/></inline-formula> in equation (1) refer to the transmission network rather than the actual contact network. The connection between contact networks and transmission networks has recently been studied in the context of epidemic percolation networks <xref ref-type="bibr" rid="pcbi.1002413-Kenah1">[32]</xref>. Unfortunately, the exact relationship between the quantities <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e050" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e051" xlink:type="simple"/></inline-formula> in the transmission network and the contact network has not yet been described. However, since the transmission network is a subgraph of the contact network, it is feasible to assume that contact networks that display long or short mean shortest paths also result in transmission networks with long or short mean shortest paths, respectively, and contact networks that have large or small mean excess degrees result in transmission networks with large or small mean excesss degrees, respectively.</p>
      </sec>
      <sec id="s2e">
        <title>Sampling robustness</title>
        <p>Up to this point we have only considered the case where the full transmission network is known and we can thus infer the average phylogenetic tree of the disease outbreak. It is clear, however, that in the real world we only have access to a limited subset of leaves from a phylogenetic tree. It is thus necessary to study the robustness of the tree shape under random sampling of leaves.</p>
        <p><xref ref-type="fig" rid="pcbi-1002413-g004">Figure 4</xref> shows the imbalance of the tree as a function of the number of sampled lineages. All non sampled branches are pruned from the tree and the sampled branches are joined together at their last common ancestor to create the sample tree (see <xref ref-type="fig" rid="pcbi-1002413-g005">Figure 5A</xref>). For small enough sampling sizes (around 1%) the ER and WS graphs become indistinguishable, indicating that the imbalance is driven by the finer structures of the tree, rather than the backbone. The imbalance of the BA network converges much slower to that of the ER network.</p>
        <fig id="pcbi-1002413-g004" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g004</object-id>
          <label>Figure 4</label>
          <caption>
            <title>Effect of random sampling on tree balance for infection trees of an ER graph (red), WS graph (green) and BA graph (blue).</title>
            <p>The mean degree and the transmissibility are the same for all networks: <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e052" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e053" xlink:type="simple"/></inline-formula>. The rewiring probability for the WS is <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e054" xlink:type="simple"/></inline-formula>. The solid lines show the median over <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e055" xlink:type="simple"/></inline-formula> simulations and the light and dark shaded areas the 95% and 50% quantiles respectively. The dotted lines show the median normalized Sackin index for fully sampled trees of size equivalent to the sample size.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g004" xlink:type="simple"/>
        </fig>
        <fig id="pcbi-1002413-g005" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g005</object-id>
          <label>Figure 5</label>
          <caption>
            <title>Sampling schemes of tree leaves. The grey lines represent the full transmission tree.</title>
            <p>The red dashed lines are the reconstructed coalescent events of the sampled branches. (A) Random sampling: branches are randomly selected from the complete tree. (B) Sampling up to time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e056" xlink:type="simple"/></inline-formula>: all transmission events that happened before time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e057" xlink:type="simple"/></inline-formula> are kept. (C) Sampling at time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e058" xlink:type="simple"/></inline-formula>: only branches alive at a given time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e059" xlink:type="simple"/></inline-formula> are kept and the coalescent events are reconstructed.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g005" xlink:type="simple"/>
        </fig>
        <p><xref ref-type="fig" rid="pcbi-1002413-g005">Figures 5B and 5C</xref> show two schemes of time sampling for which we study the effect on tree imbalance. In the first scheme we truncate the tree at a time point <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e060" xlink:type="simple"/></inline-formula> before the end of the epidemic (<xref ref-type="fig" rid="pcbi-1002413-g005">Figure 5B</xref>). This corresponds to the situation where samples of all individuals in an ongoing epidemic are available. In the second scheme, we use only those sequences from individuals that are infectious at a time point <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e061" xlink:type="simple"/></inline-formula> and exclude sequences from individuals who are no longer infectious or have died before <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e062" xlink:type="simple"/></inline-formula> (<xref ref-type="fig" rid="pcbi-1002413-g005">Figure 5C</xref>). This corresponds to a snapshot of an epidemic.</p>
        <p>In <xref ref-type="fig" rid="pcbi-1002413-g006">Figure 6A</xref>, we observe that tree balancedness saturates at a certain value for ER and BA models, even before the epidemic has stopped. In the case of the WS model, tree imbalance continues to grow exponentially until the last individual has been infected. This indicates that in the ER and BA models, the early stages of the epidemic contribute more strongly to tree imbalance. In contrast, in the WS model the late stage infections contribute more strongly than the early stage infections. This is consistent with the observations made in the case of random sampling, since random sampling tends to destroy the tree structure towards the tips of the tree, while conserving the structure towards the root of the tree. This differentiation can no longer be observed when a snapshot of the epidemic is used to create the tree (<xref ref-type="fig" rid="pcbi-1002413-g006">Figure 6B</xref>).</p>
        <fig id="pcbi-1002413-g006" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g006</object-id>
          <label>Figure 6</label>
          <caption>
            <title>Time evolution of the normalized Sackin index, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e063" xlink:type="simple"/></inline-formula> for the networks generated by the ER (red), BA (blue) and WS (green) model respectively.</title>
            <p>For all models, the mean number of neighbors <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e064" xlink:type="simple"/></inline-formula> and the total population size <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e065" xlink:type="simple"/></inline-formula>. The epidemic parameters are <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e066" xlink:type="simple"/></inline-formula>. For the WS model, the rewiring probability is <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e067" xlink:type="simple"/></inline-formula>. <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e068" xlink:type="simple"/></inline-formula> epidemics on different networks were simulated for each of the three networks models. (A) Tree imbalance when all individuals that have been infected prior to the time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e069" xlink:type="simple"/></inline-formula> are included in the tree. While both the ER and BA models saturate at a certain value of the normalized Sackin index, the WS model continues to grow exponentially with new infected individuals. (B) Only those individuals which are infectious at time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e070" xlink:type="simple"/></inline-formula> are included in the tree.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g006" xlink:type="simple"/>
        </fig>
        <p>The two schemes of time sampling are studied here because they are characteristic for data sampling in different biological contexts. The first scheme reflects the typical situation for real epidemics for which sequence information is sampled over a broad time window. The second scheme is more applicable to phylogenetic trees based on pathogen populations from within an individual host. While we have concentrated so far on the inference of epidemiological contact structure from phylogenetic trees, we note that our approach can also be used to study the imbalance of within-host trees, which may result from spatial structure or compartmentalization. Both these schemes are idealizations of available real data. In most situations the sample structure will in fact be a combination of one of the two time sampling schemes and random sampling as discussed in the previous section.</p>
      </sec>
      <sec id="s2f">
        <title>HIV epidemic</title>
        <p>Above we demonstrated that contact structure can result in strongly unbalanced trees. Here we investigate whether real epidemics also result in unbalanced trees. To this end we examine the imbalance of a phylogenetic tree constructed from 5961 patient sequences of the Swiss HIV cohort study <xref ref-type="bibr" rid="pcbi.1002413-Kouyos1">[33]</xref> (see <xref ref-type="fig" rid="pcbi-1002413-g007">Figure 7</xref>).</p>
        <fig id="pcbi-1002413-g007" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pcbi.1002413.g007</object-id>
          <label>Figure 7</label>
          <caption>
            <title>Tree imbalance of the Swiss HIV phylogenetic tree and 100 bootstrap trees (candlestick).</title>
            <p>The solid red curve shows the behavior of the normalized Sackin index for a tree with randomly sampled leaves from the complete HIV tree. The blue curve is the normalized Sackin index of an epidemic in a susceptible population displaying random mixing. The size of the susceptible population is chosen uniformly from the interval <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e071" xlink:type="simple"/></inline-formula>. The total epidemic size is chosen uniformly from the interval <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e072" xlink:type="simple"/></inline-formula>. The removal rate <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e073" xlink:type="simple"/></inline-formula> was chosen to be <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e074" xlink:type="simple"/></inline-formula> and transmission rate such that <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e075" xlink:type="simple"/></inline-formula>. We simulated <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e076" xlink:type="simple"/></inline-formula> outbreaks for each of the sample sizes of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e077" xlink:type="simple"/></inline-formula>. The light shaded areas show the <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e078" xlink:type="simple"/></inline-formula> credible intervals, the dark shaded areas the <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e079" xlink:type="simple"/></inline-formula> credible intervals. The individual data points are the values of the normalized Sackin index for the three largest transmission clusters: heterosexuals/intravenous drug users (HET/IDU) and two men having sex with men (MSM) clusters.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.g007" xlink:type="simple"/>
        </fig>
        <p>Since SIR dynamics with low <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e080" xlink:type="simple"/></inline-formula> (i.e. small mean degree <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e081" xlink:type="simple"/></inline-formula> or transmissibility <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e082" xlink:type="simple"/></inline-formula> close to the critical value <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e083" xlink:type="simple"/></inline-formula>) can potentially also generate strongly unbalanced trees, we compare the imbalance of the HIV tree to an SIR epidemic with random mixing and an <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e084" xlink:type="simple"/></inline-formula>, corresponding to the range of realistic <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e085" xlink:type="simple"/></inline-formula> that has been estimated for the HIV epidemic in Switzerland <xref ref-type="bibr" rid="pcbi.1002413-Stadler1">[34]</xref>. The sampled individuals cover 30–40% of all Swiss HIV infected individuals and we therefore restrict the total epidemic size to the range <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e086" xlink:type="simple"/></inline-formula>. It has been argued that the HIV epidemic is still in the exponential stage in developed countries <xref ref-type="bibr" rid="pcbi.1002413-Bezemer1">[35]</xref>. However, because saturation of an epidemic also causes increased imbalance, we make the conservative assumption that the total population is finite and can be equal to the current epidemic size. We take the range of possible population sizes to be <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e087" xlink:type="simple"/></inline-formula>. As a null model, we use a likelihood-free test of departure from random mixing based on <xref ref-type="bibr" rid="pcbi.1002413-Drummond1">[36]</xref>. We repeatedly sample parameters uniformly from the intervals above and simulate an epidemic outbreak using these parameters under the assumption of random mixing. We then randomly sample between <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e088" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e089" xlink:type="simple"/></inline-formula> individuals from the simulated tree and calculate the normalized Sackin index of the resulting subtree (blue line and shaded areas in <xref ref-type="fig" rid="pcbi-1002413-g007">Figure 7</xref>). We compare this to subtrees with identical number of sampled individuals from the HIV tree from <xref ref-type="bibr" rid="pcbi.1002413-Kouyos1">[33]</xref> (red line and shaded areas in <xref ref-type="fig" rid="pcbi-1002413-g007">Figure 7</xref>; see <xref ref-type="supplementary-material" rid="pcbi.1002413.s003">Figures S3</xref>, <xref ref-type="supplementary-material" rid="pcbi.1002413.s004">S4</xref> and <xref ref-type="supplementary-material" rid="pcbi.1002413.s006">S6</xref> for an analysis using an alternative imbalance measure, as well as a more detailed view of the effect of individual parameters on tree imbalance).</p>
        <p>Comparing the HIV tree with an SIR epidemic with equal number of individuals connected by random mixing shows that the HIV tree exhibits strong imbalance. The normalized Sackin index of the HIV tree is <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e090" xlink:type="simple"/></inline-formula> with a minimum/maximum of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e091" xlink:type="simple"/></inline-formula>/<inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e092" xlink:type="simple"/></inline-formula> based on 100 bootstrap trees constructed from sequences with the amino acid positions resampled. The range of values of the normalized Sackin index of the HIV tree as well as the bootstrap trees is outside the 95% confidence interval for the SIR model, implying that the imbalance of the HIV tree is statistically highly significant.</p>
        <p>One important component of contact structure in the HIV epidemic is the preferential transmission within transmission groups (such as heterosexuals, intravenous drug users, and men having sex with men) <xref ref-type="bibr" rid="pcbi.1002413-Kouyos1">[33]</xref>. Subepidemics occurring within these transmission groups are therefore expected to show decreased levels of imbalance. Indeed, calculating the Sackin index for the three largest transmission clusters <xref ref-type="bibr" rid="pcbi.1002413-Kouyos1">[33]</xref> reveals much more balanced trees in these subepidemics (see <xref ref-type="fig" rid="pcbi-1002413-g007">Figure 7</xref>). However, the observed level of imbalance is still significant, suggesting that contact structure is present even within these transmission groups. As we pointed out above, the imbalance in the SIR model increases with <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e093" xlink:type="simple"/></inline-formula> approaching <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e094" xlink:type="simple"/></inline-formula>. Therefore, the significance of the imbalance of the subepidemics depends on the choice of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e095" xlink:type="simple"/></inline-formula> and thus <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e096" xlink:type="simple"/></inline-formula>.</p>
        <p>In summary, our analysis of the HIV tree reveals substantial imbalance in the entire epidemic, possibly extending to the subepidemics, which is consistent with what would be expected from our knowledge of HIV transmission.</p>
      </sec>
    </sec>
    <sec id="s3">
      <title>Discussion</title>
      <p>In this paper, we have studied the effect of different classes of contact networks to model SIR type epidemics. We show that simulations of epidemics on networks with non-random contact structure result in transmission trees with topologies that exhibit strong differences from tree topologies that would be obtained under the assumption of random mixing. Measures of tree imbalance such as the (normalized) Sackin index can be used to reveal such differences and to quantify the statistical significance of departure from models assuming random mixing.</p>
      <p>Epidemiological properties, such as rate of spread or probability of outbreak, are known to depend sensitively on contact structure. If appropriate genetic data are available, then the approach presented here allows testing whether an epidemic may be appropriately modeled by standard models assuming random mixing. Conversely, if one is interested in phylogenetic tree structure of infectious pathogens, then knowledge of the contact structure in the host population will be important for the correct interpretation of the tree topology. Such contact structure may be on a coarse grained level (e.g. in between cities for human infectious diseases) or on a fine grained level (e.g. contact patterns between individuals). The level at which a phylogenetic tree is able to resolve any contact structure depends on the rate of evolution of the pathogen. In cases such as HIV, where the rates of evolution are high enough to result in substantial genetic differences between virus populations of individual hosts, a phylogenetic tree may reveal contact structure down to the individual level. Indeed our analysis of the phylogenetic tree underlying the Swiss HIV epidemic provides evidence for non-random contact structure on the population level as well as the individual level. A considerable part of the imbalance of the HIV tree is likely attributable to a high-level contact structure that arises from preferential transmission within transmission groups (such as heterosexuals, intravenous drug users, and men having sex with men). An analysis of the imbalance of subtrees corresponding to individual transmission groups, however, also reveals a signal indicative of non-random contact structure within these transmission groups.</p>
      <p>The importance of contact structure for epidemiological processes has been clearly demonstrated by a large number of theoretical studies <xref ref-type="bibr" rid="pcbi.1002413-Newman1">[2]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Meyers1">[4]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Volz1">[5]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-May1">[8]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-PastorSatorras2">[10]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Keeling1">[11]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Moore1">[37]</xref>. This has sparked considerable interests in determining the contact structure that underlies the transmission of different pathogens <xref ref-type="bibr" rid="pcbi.1002413-Hamilton1">[12]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Liljeros1">[13]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Salathe1">[16]</xref>. The determination of such contact networks is fraught with difficulties. Contact networks based on patient interviews may suffer from incompleteness, inaccuracy and in some cases also lack of reliability of patient information. Contact networks derived from devices that measure physical proximity often do not have sufficient spatial resolution or may neglect to account for important pathogen specific factors.</p>
      <p>Our method takes a first step to infer contact structure from genetic data. In comparison to the other methods mentioned above it has the advantage of being based on data that are readily available for many important pathogens. Moreover, our approach focuses only on those contacts that have led to transmission rather than other contacts between individuals that may be irrelevant for the spread of the epidemic. The method presented here allows testing for deviations from the assumption of random mixing. It is possible to extend the approach to test departure from BA or WS networks or other network models (e.g. configuration model networks with specific degree distributions), equivalent to the Swiss HIV epidemic (see <xref ref-type="sec" rid="s2">Results</xref>). Thus our approach is able to distinguish between different types of contact networks in a statistical sense, but it does not yield the actual contact network that underlies the epidemic. The choice of null model will strongly depend on the epidemic studied. Different diseases have different transmission routes and thus a contact that is relevant for transmission for one disease may not be relevant for transmission of another disease.</p>
      <p>Our method is based on imbalance, which is only a crude measure of tree topology. We have shown that imbalance cannot distinguish between BA and WS networks in our case. We expect that using other independent measurements of tree topology can reveal further information on the structure of the underlying contact network. Branch lengths are one such measure <xref ref-type="bibr" rid="pcbi.1002413-Aldous1">[22]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Mooers1">[27]</xref>. In simulated epidemic outbreaks, where the exact waiting times between infection and recovery events are known, branch lengths can be used together with tree imbalance to distinguish between ER, BA and WS models (<xref ref-type="supplementary-material" rid="pcbi.1002413.s007">Figure S7</xref>). It is important to note that maximum likelihood analyses typically provide trees where branch lengths represent evolutionary time. The branch length statistic used in <xref ref-type="supplementary-material" rid="pcbi.1002413.s007">Figure S7</xref> requires edge lengths in calender time. In order to obtain accurate branch length estimates in calender time rather than evolutionary time, we need to allow for the observed variation in evolutionary rates across branches, such as relaxed clock models <xref ref-type="bibr" rid="pcbi.1002413-Drummond2">[38]</xref>. Due to the model complexity, these analyses are typically done in a Bayesian MCMC framework which does not converge for datasets of our size using the current implementation <xref ref-type="bibr" rid="pcbi.1002413-Drummond3">[39]</xref>. Thus the reliability of branch length estimates in our reconstructed phylogenies is questionable and this measure should only be used when confidence in branch lengths of the reconstructed tree is very high.</p>
      <p>The imbalance of the reconstructed phylogenies depends on the the genetic data used. Sampling biases at the genetic level can result in a strongly unbalanced tree <xref ref-type="bibr" rid="pcbi.1002413-Mooers1">[27]</xref>, even if the underlying population is randomly mixed. This sampling bias will be reflected in the inferred contact structure. In this sense, if connected subsets of the population are more densely sampled than others, the resulting contact structure will show that these individuals are much more highly interconnected than the other individuals that belong to those sub-populations that are only sparsely sampled. This can be both advantageous as well as disadvantageous, depending on what the contact structure should reflect. If the sampling of genetic data is high in those sub-populations where we require high resolution, then the inferred contact structure will be representative of this sub-population, but not of those that are poorly sampled. Our method would therefore reject an epidemic model of contact structure where all individuals are equally likely to be interconnected for one where some individuals are highly connected (i.e. those from the densely sampled sub-population) and others are weakly connected (i.e. those from the sparsely sampled sub-population).</p>
      <p>We also note that the approach presented here can be applied to the analysis of phylogenetic trees based on pathogen populations within an infected individual. The models for the dynamics of pathogen populations within an infected individual typically are also based on the assumption of random mixing and our approach would allow to test whether this assumption is fulfilled. For phylogenetic trees based on within-host data imbalance would likely reflect a compartmentalization of pathogen replication and could thus provide important insight into mechanisms of pathogenesis.</p>
    </sec>
    <sec id="s4" sec-type="materials|methods">
      <title>Materials and Methods</title>
      <sec id="s4a">
        <title>Ethics statement</title>
        <p>The Swiss HIV cohort study was approved by individual local institutional review boards of all participating centers (<ext-link ext-link-type="uri" xlink:href="http://www.shcs.ch" xlink:type="simple">www.shcs.ch</ext-link>). Written informed consent was obtained for each SHCS study participant.</p>
      </sec>
      <sec id="s4b">
        <title>Deterministic SIR/SI model</title>
        <p>We consider a disease spreading amongst a susceptible population that displays susceptible-infected-removed (SIR) type dynamics <xref ref-type="bibr" rid="pcbi.1002413-Anderson1">[1]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Kermack1">[40]</xref>. In the limit of large population size and random mixing the model can be described by the simple system of differential equations<disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e097" xlink:type="simple"/><label>(2)</label></disp-formula><disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e098" xlink:type="simple"/><label>(3)</label></disp-formula><disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e099" xlink:type="simple"/><label>(4)</label></disp-formula><inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e100" xlink:type="simple"/></inline-formula>, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e101" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e102" xlink:type="simple"/></inline-formula> are the number of susceptible, infected, and removed individuals in each compartment at time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e103" xlink:type="simple"/></inline-formula>. Here, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e104" xlink:type="simple"/></inline-formula> is the rate of transmission per contact between a susceptible and infected individual and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e105" xlink:type="simple"/></inline-formula> is the removal rate of infected individuals. In the context of a network the transmissibility <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e106" xlink:type="simple"/></inline-formula> is the probability that an individual will transmit the disease across a single contact over the whole duration of the epidemic. This can be calculated from <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e107" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e108" xlink:type="simple"/></inline-formula> by averaging over the distribution of waiting times for transmission and recovery. For a given recovery time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e109" xlink:type="simple"/></inline-formula>, the probability that transmission occurs before the individuals recovers is given by <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e110" xlink:type="simple"/></inline-formula>. Thus, if the recovery times are exponentially distributed <xref ref-type="bibr" rid="pcbi.1002413-Volz1">[5]</xref>,<disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e111" xlink:type="simple"/><label>(5)</label></disp-formula>The basic reproductive ratio <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e112" xlink:type="simple"/></inline-formula> is the number of secondary infections caused by an infected individual placed into a wholly susceptible population (<inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e113" xlink:type="simple"/></inline-formula>) <xref ref-type="bibr" rid="pcbi.1002413-Anderson1">[1]</xref>. In fully mixed populations, an epidemic can occur when <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e114" xlink:type="simple"/></inline-formula>. Here, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e115" xlink:type="simple"/></inline-formula>. In non-homogeneous populations this threshold also depends on the contact structure. For networks generated by the configuration model <xref ref-type="bibr" rid="pcbi.1002413-Molloy1">[41]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Newman2">[42]</xref>, i.e. random contact networks with a given degree distribution, the expected total number of second neighbors (neighbors of my neighbors) is given by <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e116" xlink:type="simple"/></inline-formula>, where <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e117" xlink:type="simple"/></inline-formula> and <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e118" xlink:type="simple"/></inline-formula> are the first and second moments of the degree distribution <xref ref-type="bibr" rid="pcbi.1002413-Newman3">[43]</xref>. Then <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e119" xlink:type="simple"/></inline-formula> is the average number of nodes two steps away per neighbor. Thus the expected number of secondary infections per infected individual is <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e120" xlink:type="simple"/></inline-formula>. For an epidemic to occur <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e121" xlink:type="simple"/></inline-formula> must be greater than 1 or <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e122" xlink:type="simple"/></inline-formula> <xref ref-type="bibr" rid="pcbi.1002413-Newman1">[2]</xref>, <xref ref-type="bibr" rid="pcbi.1002413-Volz1">[5]</xref>. When the population additionally displays community structure (such as clustering and modularity) this threshold changes again. For example, the Watts-Strogatz model incorporates local connectedness by starting with a regular network where every node is connected to a fixed number of close neighbors. Then, each connection is rewired to a randomly chosen node with a certain probability, thus creating shortcuts in the contact network <xref ref-type="bibr" rid="pcbi.1002413-Watts1">[31]</xref>. In this case the threshold for an epidemic outbreak also depends on this rewiring probability <xref ref-type="bibr" rid="pcbi.1002413-Moore1">[37]</xref>.</p>
        <p>Since we are not interested in the exact values of the parameters, we can choose <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e123" xlink:type="simple"/></inline-formula> by rescaling <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e124" xlink:type="simple"/></inline-formula> without loss of generality. Furthermore, it should be noted that as <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e125" xlink:type="simple"/></inline-formula> approaches 1, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e126" xlink:type="simple"/></inline-formula> gets much larger than <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e127" xlink:type="simple"/></inline-formula>. Hence, the SIR model with large <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e128" xlink:type="simple"/></inline-formula> effectively reduces to an SI model.</p>
      </sec>
      <sec id="s4c">
        <title>Simulation of transmission trees</title>
        <p>In order to simulate transmission trees of epidemics occurring in heterogeneously connected populations, a C<sub>++</sub> implementation of Gillespie's Next-Reaction Method was used <xref ref-type="bibr" rid="pcbi.1002413-Gillespie1">[44]</xref>. At the beginning of the simulation a single node is infected and a recovery time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e129" xlink:type="simple"/></inline-formula> is sampled from the distribution of recovery times, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e130" xlink:type="simple"/></inline-formula>. Each of the node's susceptible neighbors is then infected after a time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e131" xlink:type="simple"/></inline-formula> chosen from the distribution of infections times, <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e132" xlink:type="simple"/></inline-formula>. If the infection time is shorter than the recovery time, the link is activated and the node is infected at time <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e133" xlink:type="simple"/></inline-formula>. The procedure is then repeated for each newly infected node. In case a node is scheduled to be infected by multiple neighbors, the earliest infection takes priority. By keeping track of who-infects-whom, each epidemic outbreak yields an infection tree.</p>
      </sec>
      <sec id="s4d">
        <title>Network generation</title>
        <p>We study three different network models: (a) the Erdös-Rényi (ER) random graph <xref ref-type="bibr" rid="pcbi.1002413-Erds1">[29]</xref>, (b) the Barabási-Albert (BA) graph <xref ref-type="bibr" rid="pcbi.1002413-Barabsi1">[30]</xref> and (c) the Watts-Strogatz (WS) graph <xref ref-type="bibr" rid="pcbi.1002413-Watts1">[31]</xref>. In the ER random graph every individual is connected to every other individual with a certain probability <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e134" xlink:type="simple"/></inline-formula>. This results in a graph with a Poissonian degree distribution with mean number of neighbors <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e135" xlink:type="simple"/></inline-formula>. The BA graph is constructed by preferential attachment. Each node is sequentially added to the graph and attached to <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e136" xlink:type="simple"/></inline-formula> neighbors, where nodes that already have many neighbors have a higher probability of being connected to the new node. This results in a degree distribution with a power-law tail. Such graphs are often referred to as scale-free <xref ref-type="bibr" rid="pcbi.1002413-Barabsi1">[30]</xref>. Finally, WS graphs start out with a ring lattice, in which every node is connected to its <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e137" xlink:type="simple"/></inline-formula> nearest neighbors. Each link is then updated with probability <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e138" xlink:type="simple"/></inline-formula> in such a way that one end of the link is rewired to a randomly chosen node. Thus the node that loses the link decreases its degree by one and the node that the link is rewired to increases its degree by one. This process introduces shortcuts in the graph (i.e. decreases the mean shortest path) <xref ref-type="bibr" rid="pcbi.1002413-Watts1">[31]</xref>. For <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e139" xlink:type="simple"/></inline-formula> the graph has strongly connected communities. For <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e140" xlink:type="simple"/></inline-formula> all links are randomly assigned and the graph is similar to the ER graph with the same mean number of neighbors (equal number of edges) <xref ref-type="bibr" rid="pcbi.1002413-Newman2">[42]</xref>. For intermediate values of <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e141" xlink:type="simple"/></inline-formula>, the graphs often display both strong community structure and short path lengths, which are characteristics of small-world graphs <xref ref-type="bibr" rid="pcbi.1002413-Watts1">[31]</xref>.</p>
      </sec>
      <sec id="s4e">
        <title>Phylogenetic tree shape</title>
        <p>The shape of a phylogenetic tree is described in part by its imbalance. Here, we use the Sackin index as a measure of imbalance <xref ref-type="bibr" rid="pcbi.1002413-Sackin1">[45]</xref>, because of its analogy to path lengths in graph theory. The Sackin index is defined as follows: Let the distance <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e142" xlink:type="simple"/></inline-formula> of a leaf <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e143" xlink:type="simple"/></inline-formula> be the number of internal nodes that need to be traversed when following the path from the root of the tree to a leaf <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e144" xlink:type="simple"/></inline-formula>. Then the Sackin index is the sum of all such paths,<disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e145" xlink:type="simple"/><label>(6)</label></disp-formula></p>
        <p>When considering transmission trees, it is important to differentiate between two cases: The first case considers the complete transmission trees of an epidemic outbreak. This is essentially equivalent to a birth/death process. From the perspective of an individual, death corresponds to removal from the infectious class or the depletion of all its susceptible neighbors. In either case that individual can no longer infect anyone else. Thus the transmission trees have branches that do not all survive until the end of the epidemic.</p>
        <p>In the second type of tree, all lineages are extant at the end of the epidemic. Such a transmission tree could be generated by an SI-type epidemic in an infinite size population where each individual can infect every other individual. These trees are generated by the Yule model.</p>
        <p>The expected value of the Sackin index for a given number of leaves <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e146" xlink:type="simple"/></inline-formula> under the Yule model is given by <xref ref-type="bibr" rid="pcbi.1002413-Kirkpatrick1">[26]</xref>,<disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e147" xlink:type="simple"/><label>(7)</label></disp-formula>with Euler's constant <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e148" xlink:type="simple"/></inline-formula>.</p>
        <p>An exact expression for the expected value of the Sackin index is not known in the case where some lineages die before the end of the epidemic. However, it can be assumed that this will in general result in slightly more unbalanced trees.</p>
        <p>Since the expected value of the Sackin index increases with tree size, we introduce a normalized Sackin index defined by<disp-formula><graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e149" xlink:type="simple"/><label>(8)</label></disp-formula><inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e150" xlink:type="simple"/></inline-formula> measures the relative deviation of the tree imbalance from what would be expected for an SI epidemic (or SIR with <inline-formula><inline-graphic mimetype="image" xlink:href="info:doi/10.1371/journal.pcbi.1002413.e151" xlink:type="simple"/></inline-formula>).</p>
      </sec>
    </sec>
    <sec id="s5">
      <title>Supporting Information</title>
      <supplementary-material id="pcbi.1002413.s001" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s001" xlink:type="simple">
        <label>Figure S1</label>
        <caption>
          <p>An example illustrating the difference between (A) a contact network, (B) a transmission network where one initially infected individual (red node) causes a disease to spread along the red edges and (C) the resulting phylogenetic tree if the order in which the infections took place is known. This distinction has recently also been illustrated by Welch et al., Viruses 2011.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s002" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s002" xlink:type="simple">
        <label>Figure S2</label>
        <caption>
          <p>Cartoon of an epidemic outbreak on two idealized contact networks: (A) A chain representing an extreme case of a WS network without long-range connections. (B) An extreme case of preferential attachment, where one single center node is connected to all other nodes. The branching points in the tree represent infection events and the colors indicate the lifespan of the corresponding node in the network. Branch lengths have no significance in these cartoons.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s003" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s003" xlink:type="simple">
        <label>Figure S3</label>
        <caption>
          <p>Density plot of two imbalance measures (x-axis: normalized Sackin index; y-axis: s-index) for 10 000 trees generated under the SIR model with random mixing. The parameters are chosen the same as for <xref ref-type="fig" rid="pcbi-1002413-g007">figure 7</xref> in the main text: <italic>R<sub>0</sub></italic> = 2.14; total susceptible population: <italic>N</italic> = 40 000; maximum epidemic size: <italic>M</italic> = 20 000; number of sampled individuals: <italic>n</italic> = 5 961. The colors in the scatter plot indicate the frequency at which the SIR model resulted in a tree with imbalance measures in the given range. The large red dot indicates the maximum likelihood HIV tree and the small red dots are 100 bootstrap trees. For the normalized Sackin index all HIV trees lie outside of the distribution of SIR trees (to the right in the <italic>x</italic>-direction). The s-index, however, is unable to reject the SIR model for the HIV data. Only a small number of trees (437/10 000) generated by this process are rejected by the Sackin index test while not being rejected by the s-Index test (data lies outside the 95% interval).</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s004" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s004" xlink:type="simple">
        <label>Figure S4</label>
        <caption>
          <p>s-Index test for sub-trees of the HIV tree. <italic>y</italic> = 0 indicates the 97.5-th quantile of the s-Index for trees generated by the SIR model. Positive <italic>y</italic>-values indicate a rejection of the SIR model. MSM1 and MSM2 are the two largest MSM (men-having-sex-with-men) transmission groups in the HIV tree. HET-IDU is the largest heterosexual/intravenous-drug-user transmission group. Thus, despite the s-index not being able to reject the SIR model for the full HIV tree, sub-samples of the HIV tree can reject the SIR model using the s-index.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s005" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s005" xlink:type="simple">
        <label>Figure S5</label>
        <caption>
          <p>Blue: Normalized Sackin index for networks generated by the WS model for varying rewiring probability, transmissibility <italic>T</italic> = 0.6, <italic>K</italic> = 8 and <italic>N</italic> = 5000. Green: Normalized Sackin index for a graph generated by the configuration model with degree sequence equal to the WS model. The light and dark shaded areas represent the 95 and 50 percent confidence intervals.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s006" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s006" xlink:type="simple">
        <label>Figure S6</label>
        <caption>
          <p>Distribution of tree imbalance statistics for trees generated under the SIR model with random mixing for different parameter combinations and <italic>R</italic><sub>0</sub>. The colors in the scatter plot indicate the frequency at which the SIR model resulted in a tree with imbalance measures in the given range. The red dot indicates the real HIV tree. The normalized Sackin index rejects the SIR model for all choices of <italic>R</italic><sub>0</sub>, total susceptible population size and maximum epidemic size. (A) Total susceptible population: <italic>N</italic> = 40 000; maximum epidemic size: <italic>M</italic> = 20 000; number of sampled individuals: <italic>n</italic> = 5 961. (B) Total susceptible population: <italic>N</italic> = 30 000; maximum epidemic size: M = 20 000; number of sampled individuals: <italic>n</italic> = 5 961. (C) Total susceptible population: <italic>N</italic> = 40 000; maximum epidemic size: <italic>M</italic> = 10 000; number of sampled individuals: n = 5 961.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s007" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s007" xlink:type="simple">
        <label>Figure S7</label>
        <caption>
          <p>Tree imbalance (normalized Sackin index) plotted against the mean external branch length divided by the mean internal branch length at different values of transmissibility. This figure complements figure in the main text. The different colors and point shapes are networks generated by the three network models: ER (green circles), BA (red triangles), WS (blue diamonds). In all panels, <italic>N</italic> = 5000 and γ = 1. The connectivity for the WS model is <italic>p</italic> = 0:01. For each value of <italic>T</italic>, 2 000 simulations were performed per network model. Only those simulations where an outbreak occurred (epidemic size &gt;.05<italic>N</italic>) are plotted. For those values of <italic>T</italic> for which tree imbalance cannot distinguish between network models (e.g. at <italic>T</italic> = 0.2 the ER and BA model overlap in tree imbalance), information about branch lengths can potential give an additional resolution.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s008" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s008" xlink:type="simple">
        <label>Text S1</label>
        <caption>
          <p>Detailed description of the methods. (A) Derivation of the Sackin index for a random network generated by the configuration model. (B) Factors leading to imbalanced trees. (C) Comparison of different tree balance statistics. (D) Testing the HIV tree using different tree imbalance statistics. (E) Effect of path length on tree imbalance for WS networks.</p>
          <p>(PDF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pcbi.1002413.s009" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1002413.s009" xlink:type="simple">
        <label>Text S2</label>
        <caption>
          <p>Detailed analysis of population size N and mean number of neighbors <italic>k</italic> on tree imbalance.</p>
          <p>(PDF)</p>
        </caption>
      </supplementary-material>
    </sec>
  </body>
  <back>
    <ack>
      <p>We thank the patients participating in the SHCS for their commitment, all the study nurses and study physicians for their invaluable work, the data center for data management, all the resistance testing laboratories for their high-quality work, and SmartGene for providing an impeccable database service.</p>
      <p>The members of the Swiss HIV Cohort Study are Barth J, Battegay M, Bernasconi E, Bni J, Bucher HC, Brgisser P, Burton-Jeangros C, Calmy A, Cavassini M, Egger M, Elzi L, Fehr J, Flepp M, Francioli P (President of the SHCS), Furrer H (Chairman of the Clinical and Laboratory Committee), Fux CA, Gorgievski M, Gnthard HF (Chairman of the Scientific Board), Hasse B, Hirsch HH, Hirschel B, Hsli I, Kahlert C, Kaiser L, Keiser O, Kind C, Klimkait T, Kovari H, Ledergerber B, Martinetti G, Martinez de Tejada B, Mller N, Nadal D, Pantaleo G, Rauch A, Regenass S, Rickenbach M (Head of Data Center), Rudin C (Chairman of the Mother &amp; Child Substudy), Schmid P, Schultze D, Schni-Affolter F, Schpbach J, Speck R, Taff P, Telenti A, Trkola A, Vernazza P, von Wyl V, Weber R, Yerly S.</p>
    </ack>
    <ref-list>
      <title>References</title>
      <ref id="pcbi.1002413-Anderson1">
        <label>1</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Anderson</surname><given-names>R</given-names></name><name name-style="western"><surname>May</surname><given-names>R</given-names></name></person-group>             <year>1992</year>             <article-title>Infectious diseases of humans: dynamics and control.</article-title>             <comment>Oxford University Press, USA</comment>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Newman1">
        <label>2</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Newman</surname><given-names>MEJ</given-names></name></person-group>             <year>2002</year>             <article-title>Spread of epidemic disease on networks.</article-title>             <source>Phys Rev E</source>             <volume>66</volume>             <fpage>016128</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Moreno1">
        <label>3</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Moreno</surname><given-names>Y</given-names></name><name name-style="western"><surname>Pastor-Satorras</surname><given-names>R</given-names></name><name name-style="western"><surname>Vespignani</surname><given-names>A</given-names></name></person-group>             <year>2002</year>             <article-title>Epidemic outbreaks in complex heterogeneous networks.</article-title>             <source>Eur Phys J B</source>             <volume>26</volume>             <fpage>521</fpage>             <lpage>529</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Meyers1">
        <label>4</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Meyers</surname><given-names>LA</given-names></name><name name-style="western"><surname>Newman</surname><given-names>MEJ</given-names></name><name name-style="western"><surname>Pourbohloul</surname><given-names>B</given-names></name></person-group>             <year>2006</year>             <article-title>Predicting epidemics on directed contact networks.</article-title>             <source>J Theor Biol</source>             <volume>240</volume>             <fpage>400</fpage>             <lpage>418</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Volz1">
        <label>5</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Volz</surname><given-names>E</given-names></name></person-group>             <year>2008</year>             <article-title>SIR dynamics in random networks with heterogeneous connectivity.</article-title>             <source>J Math Biol</source>             <volume>56</volume>             <fpage>293</fpage>             <lpage>310</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Miller1">
        <label>6</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>JC</given-names></name><name name-style="western"><surname>Slim</surname><given-names>AC</given-names></name><name name-style="western"><surname>Volz</surname><given-names>EM</given-names></name></person-group>             <year>2011</year>             <article-title>Edge-based compartmental modelling for infectious disease spread.</article-title>             <source>J R Soc Interface</source>             <comment>doi:<ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1098/rsif.2011.0403" xlink:type="simple">10.1098/rsif.2011.0403</ext-link></comment>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Miller2">
        <label>7</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Miller</surname><given-names>JC</given-names></name></person-group>             <year>2011</year>             <article-title>A note on a paper by Erik Volz: SIR dynamics in random networks.</article-title>             <source>J Math Biol</source>             <volume>62</volume>             <fpage>349</fpage>             <lpage>358</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-May1">
        <label>8</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>May</surname><given-names>RM</given-names></name><name name-style="western"><surname>Lloyd</surname><given-names>AL</given-names></name></person-group>             <year>2001</year>             <article-title>Infection dynamics on scale-free networks.</article-title>             <source>Phys Rev E</source>             <volume>64</volume>             <fpage>066112</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-PastorSatorras1">
        <label>9</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pastor-Satorras</surname><given-names>R</given-names></name><name name-style="western"><surname>Vespignani</surname><given-names>A</given-names></name></person-group>             <year>2001</year>             <article-title>Epidemic dynamics and endemic states in complex networks.</article-title>             <source>Phys Rev E</source>             <volume>63</volume>             <fpage>066117</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-PastorSatorras2">
        <label>10</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pastor-Satorras</surname><given-names>R</given-names></name><name name-style="western"><surname>Vespignani</surname><given-names>A</given-names></name></person-group>             <year>2001</year>             <article-title>Epidemic spreading in scale-free networks.</article-title>             <source>Phys Rev Lett</source>             <volume>86</volume>             <fpage>3200</fpage>             <lpage>3203</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Keeling1">
        <label>11</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Keeling</surname><given-names>M</given-names></name></person-group>             <year>2005</year>             <article-title>The implications of network structure for epidemic dynamics.</article-title>             <source>Theor Popul Biol</source>             <volume>67</volume>             <fpage>1</fpage>             <lpage>8</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Hamilton1">
        <label>12</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Hamilton</surname><given-names>DT</given-names></name><name name-style="western"><surname>Handcock</surname><given-names>MS</given-names></name><name name-style="western"><surname>Morris</surname><given-names>M</given-names></name></person-group>             <year>2008</year>             <article-title>Degree distributions in sexual networks: A framework for evaluating evidence.</article-title>             <source>Sex Transm Dis</source>             <volume>35</volume>             <fpage>30</fpage>             <lpage>40</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Liljeros1">
        <label>13</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Liljeros</surname><given-names>F</given-names></name><name name-style="western"><surname>Edling</surname><given-names>CR</given-names></name><name name-style="western"><surname>Amaral</surname><given-names>LAN</given-names></name><name name-style="western"><surname>Stanley</surname><given-names>HE</given-names></name><name name-style="western"><surname>Aberg</surname><given-names>Y</given-names></name></person-group>             <year>2001</year>             <article-title>The web of human sexual contacts.</article-title>             <source>Nature</source>             <volume>411</volume>             <fpage>907</fpage>             <lpage>908</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Mossong1">
        <label>14</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Mossong</surname><given-names>J</given-names></name><name name-style="western"><surname>Hens</surname><given-names>N</given-names></name><name name-style="western"><surname>Jit</surname><given-names>M</given-names></name><name name-style="western"><surname>Beutels</surname><given-names>P</given-names></name><name name-style="western"><surname>Auranen</surname><given-names>K</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Social contacts and mixing patterns relevant to the spread of infectious diseases.</article-title>             <source>PLoS Med</source>             <volume>5</volume>             <fpage>381</fpage>             <lpage>391</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Read1">
        <label>15</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Read</surname><given-names>JM</given-names></name><name name-style="western"><surname>Eames</surname><given-names>KTD</given-names></name><name name-style="western"><surname>Edmunds</surname><given-names>WJ</given-names></name></person-group>             <year>2008</year>             <article-title>Dynamic social networks and the implications for the spread of infectious disease.</article-title>             <source>J R Soc Interface</source>             <volume>5</volume>             <fpage>1001</fpage>             <lpage>1007</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Salathe1">
        <label>16</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Salathe</surname><given-names>M</given-names></name><name name-style="western"><surname>Kazandjieva</surname><given-names>M</given-names></name><name name-style="western"><surname>Lee</surname><given-names>JW</given-names></name><name name-style="western"><surname>Levis</surname><given-names>P</given-names></name><name name-style="western"><surname>Feldman</surname><given-names>MW</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>A high-resolution human contact network for infectious disease transmission.</article-title>             <source>Proc Natl Acad Sci U S A</source>             <volume>107</volume>             <fpage>22020</fpage>             <lpage>22025</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Grenfell1">
        <label>17</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Grenfell</surname><given-names>B</given-names></name><name name-style="western"><surname>Pybus</surname><given-names>O</given-names></name><name name-style="western"><surname>Gog</surname><given-names>J</given-names></name><name name-style="western"><surname>Wood</surname><given-names>J</given-names></name><name name-style="western"><surname>Daly</surname><given-names>J</given-names></name><etal/></person-group>             <year>2004</year>             <article-title>Unifying the epidemiological and evolutionary dynamics of pathogens.</article-title>             <source>Science</source>             <volume>303</volume>             <fpage>327</fpage>             <lpage>332</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Holmes1">
        <label>18</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Holmes</surname><given-names>EC</given-names></name><name name-style="western"><surname>Grenfell</surname><given-names>BT</given-names></name></person-group>             <year>2009</year>             <article-title>Discovering the phylodynamics of RNA viruses.</article-title>             <source>PLoS Comp Biol</source>             <volume>5</volume>             <fpage>e1000505</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Volz2">
        <label>19</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Volz</surname><given-names>EM</given-names></name><name name-style="western"><surname>Pond</surname><given-names>SLK</given-names></name><name name-style="western"><surname>Ward</surname><given-names>MJ</given-names></name><name name-style="western"><surname>Brown</surname><given-names>AJL</given-names></name><name name-style="western"><surname>Frost</surname><given-names>SDW</given-names></name></person-group>             <year>2009</year>             <article-title>Phylodynamics of infectious disease epidemics.</article-title>             <source>Genetics</source>             <volume>183</volume>             <fpage>1421</fpage>             <lpage>1430</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-ODea1">
        <label>20</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>O'Dea</surname><given-names>EB</given-names></name><name name-style="western"><surname>Wilke</surname><given-names>CO</given-names></name></person-group>             <year>2011</year>             <article-title>Contact heterogeneity and phylodynamics: how contact networks shape parasite evolutionary trees.</article-title>             <source>Interdiscip Perspect Infect Dis </source>             <volume>2011</volume>             <fpage>238743</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Welch1">
        <label>21</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Welch</surname><given-names>D</given-names></name><name name-style="western"><surname>Bansal</surname><given-names>S</given-names></name><name name-style="western"><surname>Hunter</surname><given-names>DR</given-names></name></person-group>             <year>2011</year>             <article-title>Statistical inference to advance network models in epidemiology.</article-title>             <source>Epidemics</source>             <volume>3</volume>             <fpage>38</fpage>             <lpage>45</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Aldous1">
        <label>22</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Aldous</surname><given-names>D</given-names></name></person-group>             <year>2001</year>             <article-title>Stochastic models and descriptive statistics for phylogenetic trees, from yule to today.</article-title>             <source>Stat Sci</source>             <volume>16</volume>             <fpage>23</fpage>             <lpage>34</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Blum1">
        <label>23</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Blum</surname><given-names>MGB</given-names></name><name name-style="western"><surname>François</surname><given-names>O</given-names></name><name name-style="western"><surname>Janson</surname><given-names>S</given-names></name></person-group>             <year>2006</year>             <article-title>The mean, variance and limiting distribution of two statistics sensitive to phylogenetic tree balance.</article-title>             <source>Ann Appl Probab</source>             <volume>16</volume>             <fpage>2195</fpage>             <lpage>2214</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Blum2">
        <label>24</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Blum</surname><given-names>MGB</given-names></name><name name-style="western"><surname>François</surname><given-names>O</given-names></name></person-group>             <year>2006</year>             <article-title>Which random processes describe the tree of life? a large-scale study of phylogenetic tree imbalance.</article-title>             <source>Syst Biol</source>             <volume>55</volume>             <fpage>685</fpage>             <lpage>691</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Blum3">
        <label>25</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Blum</surname><given-names>MG</given-names></name><name name-style="western"><surname>François</surname><given-names>O</given-names></name></person-group>             <year>2005</year>             <article-title>On statistical tests of phylogenetic tree imbalance: The sackin and other indices revisited.</article-title>             <source>Math Biosci</source>             <volume>195</volume>             <fpage>141</fpage>             <lpage>153</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Kirkpatrick1">
        <label>26</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kirkpatrick</surname><given-names>M</given-names></name><name name-style="western"><surname>Slatkin</surname><given-names>M</given-names></name></person-group>             <year>1993</year>             <article-title>Searching for evolutionary patterns in the shape of a phylogenetic tree.</article-title>             <source>Evolution</source>             <volume>47</volume>             <fpage>1171</fpage>             <lpage>1181</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Mooers1">
        <label>27</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Mooers</surname><given-names>AO</given-names></name><name name-style="western"><surname>Heard</surname><given-names>SB</given-names></name></person-group>             <year>1997</year>             <article-title>Inferring evolutionary process from phylogenetic tree shape.</article-title>             <source>Q Rev Biol</source>             <volume>72</volume>             <fpage>31</fpage>             <lpage>54</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Shao1">
        <label>28</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Shao</surname><given-names>KT</given-names></name></person-group>             <year>1990</year>             <article-title>Tree balance.</article-title>             <source>Syst Biol</source>             <volume>39</volume>             <fpage>266</fpage>             <lpage>276</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Erds1">
        <label>29</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Erdös</surname><given-names>P</given-names></name><name name-style="western"><surname>Rényi</surname><given-names>A</given-names></name></person-group>             <year>1959</year>             <volume>6</volume>             <fpage>290</fpage>             <lpage>297</lpage>             <comment>On random graphs, I. Publ Math-Debrecen</comment>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Barabsi1">
        <label>30</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Barabási</surname><given-names>AL</given-names></name><name name-style="western"><surname>Albert</surname><given-names>R</given-names></name></person-group>             <year>1999</year>             <article-title>Emergence of scaling in random networks.</article-title>             <source>Science</source>             <volume>286</volume>             <fpage>509</fpage>             <lpage>512</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Watts1">
        <label>31</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Watts</surname><given-names>DJ</given-names></name><name name-style="western"><surname>Strogatz</surname><given-names>SH</given-names></name></person-group>             <year>1998</year>             <article-title>Collective dynamics of ‘small-world’ networks.</article-title>             <source>Nature</source>             <volume>393</volume>             <fpage>440</fpage>             <lpage>442</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Kenah1">
        <label>32</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kenah</surname><given-names>E</given-names></name><name name-style="western"><surname>Miller</surname><given-names>JC</given-names></name></person-group>             <year>2011</year>             <article-title>Epidemic percolation networks, epidemic outcomes, and interventions.</article-title>             <source>Interdiscip Perspect Infect Dis</source>             <volume>2011</volume>             <fpage>543520</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Kouyos1">
        <label>33</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kouyos</surname><given-names>RD</given-names></name><name name-style="western"><surname>von Wyl</surname><given-names>V</given-names></name><name name-style="western"><surname>Yerly</surname><given-names>S</given-names></name><name name-style="western"><surname>Böni</surname><given-names>J</given-names></name><name name-style="western"><surname>Taffíe</surname><given-names>P</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>Molecular epidemiology reveals long-term changes in HIV type 1 subtype B transmission in Switzerland.</article-title>             <source>J Infect Dis</source>             <volume>201</volume>             <fpage>1488</fpage>             <lpage>97</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Stadler1">
        <label>34</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Stadler</surname><given-names>T</given-names></name><name name-style="western"><surname>Kouyos</surname><given-names>R</given-names></name><name name-style="western"><surname>von Wyl</surname><given-names>V</given-names></name><name name-style="western"><surname>Yerly</surname><given-names>S</given-names></name><name name-style="western"><surname>Böni</surname><given-names>J</given-names></name><etal/></person-group>             <year>2012</year>             <article-title>Estimating the basic reproductive number from viral sequence data.</article-title>             <source>Mol Biol Evol</source>             <volume>29</volume>             <fpage>347</fpage>             <lpage>357</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Bezemer1">
        <label>35</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Bezemer</surname><given-names>D</given-names></name><name name-style="western"><surname>de Wolf</surname><given-names>F</given-names></name><name name-style="western"><surname>Boerlijst</surname><given-names>MC</given-names></name><name name-style="western"><surname>van Sighem</surname><given-names>A</given-names></name><name name-style="western"><surname>Hollingsworth</surname><given-names>TD</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>27 years of the HIV epidemic amongst men having sex with men in the Netherlands: An in depth mathematical model-based analysis.</article-title>             <source>Epidemics</source>             <volume>2</volume>             <fpage>66</fpage>             <lpage>79</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Drummond1">
        <label>36</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Drummond</surname><given-names>A</given-names></name><name name-style="western"><surname>Suchard</surname><given-names>M</given-names></name></person-group>             <year>2008</year>             <article-title>Fully bayesian tests of neutrality using genealogical summary statistics.</article-title>             <source>BMC Genet</source>             <volume>9</volume>             <fpage>68</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Moore1">
        <label>37</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Moore</surname><given-names>C</given-names></name><name name-style="western"><surname>Newman</surname><given-names>MEJ</given-names></name></person-group>             <year>2000</year>             <article-title>Epidemics and percolation in small-world networks.</article-title>             <source>Phys Rev E</source>             <volume>61</volume>             <fpage>5678</fpage>             <lpage>5682</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Drummond2">
        <label>38</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Drummond</surname><given-names>A</given-names></name><name name-style="western"><surname>Ho</surname><given-names>S</given-names></name><name name-style="western"><surname>Phillips</surname><given-names>M</given-names></name><name name-style="western"><surname>Rambaut</surname><given-names>A</given-names></name></person-group>             <year>2006</year>             <article-title>Relaxed phylogenetics and dating with confidence.</article-title>             <source>PLoS Biol</source>             <volume>4</volume>             <fpage>e88</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Drummond3">
        <label>39</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Drummond</surname><given-names>A</given-names></name><name name-style="western"><surname>Rambaut</surname><given-names>A</given-names></name></person-group>             <year>2007</year>             <article-title>BEAST: Bayesian evolutionary analysis by sampling trees.</article-title>             <source>BMC Evol Biol</source>             <volume>7</volume>             <fpage>214</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Kermack1">
        <label>40</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kermack</surname><given-names>W</given-names></name><name name-style="western"><surname>McKendrick</surname><given-names>A</given-names></name></person-group>             <year>1927</year>             <article-title>A contribution to the mathematical theory of epidemics. [I.].</article-title>             <source>P R Soc Lond A-Conta</source>             <volume>115</volume>             <fpage>700</fpage>             <lpage>721</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Molloy1">
        <label>41</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Molloy</surname><given-names>M</given-names></name><name name-style="western"><surname>Reed</surname><given-names>B</given-names></name></person-group>             <year>1995</year>             <article-title>A critical point for random graphs with a given degree sequence.</article-title>             <source>Random Struct Algor</source>             <volume>6</volume>             <fpage>161</fpage>             <lpage>180</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Newman2">
        <label>42</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Newman</surname><given-names>MEJ</given-names></name></person-group>             <year>2003</year>             <article-title>The structure and function of complex networks.</article-title>             <source>SIAM Rev</source>             <volume>45</volume>             <fpage>167</fpage>             <lpage>256</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Newman3">
        <label>43</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Newman</surname><given-names>MEJ</given-names></name><name name-style="western"><surname>Strogatz</surname><given-names>SH</given-names></name><name name-style="western"><surname>Watts</surname><given-names>DJ</given-names></name></person-group>             <year>2001</year>             <article-title>Random graphs with arbitrary degree distributions and their applications.</article-title>             <source>Phys Rev E</source>             <volume>64</volume>             <fpage>026118</fpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Gillespie1">
        <label>44</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Gillespie</surname><given-names>DT</given-names></name></person-group>             <year>1977</year>             <article-title>Exact stochastic simulation of coupled chemical reactions.</article-title>             <source>J Phys Chem-US</source>             <volume>81</volume>             <fpage>2340</fpage>             <lpage>2361</lpage>          </element-citation>
      </ref>
      <ref id="pcbi.1002413-Sackin1">
        <label>45</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Sackin</surname><given-names>M</given-names></name></person-group>             <year>1972</year>             <article-title>Good and bad phenograms.</article-title>             <source>Syst Zool</source>             <volume>21</volume>             <fpage>225</fpage>             <lpage>226</lpage>          </element-citation>
      </ref>
    </ref-list>
    
  </back>
</article>