<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article" dtd-version="3.0" xml:lang="EN">
  <front>
    <journal-meta><journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id><journal-id journal-id-type="publisher-id">plos</journal-id><journal-id journal-id-type="pmc">plosone</journal-id><!--===== Grouping journal title elements =====--><journal-title-group><journal-title>PLoS ONE</journal-title></journal-title-group><issn pub-type="epub">1932-6203</issn><publisher>
        <publisher-name>Public Library of Science</publisher-name>
        <publisher-loc>San Francisco, USA</publisher-loc>
      </publisher></journal-meta>
    <article-meta><article-id pub-id-type="publisher-id">PONE-D-11-05569</article-id><article-id pub-id-type="doi">10.1371/journal.pone.0020968</article-id><article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Article</subject>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Biology</subject>
          <subj-group>
            <subject>Biochemistry</subject>
            <subj-group>
              <subject>Nucleic acids</subject>
              <subj-group>
                <subject>DNA</subject>
              </subj-group>
            </subj-group>
          </subj-group>
          <subj-group>
            <subject>Genomics</subject>
            <subj-group>
              <subject>Comparative genomics</subject>
              <subject>Genome evolution</subject>
              <subject>Genome sequencing</subject>
            </subj-group>
          </subj-group>
          <subj-group>
            <subject>Microbiology</subject>
            <subj-group>
              <subject>Archaeans</subject>
              <subj-group>
                <subject>Archaeal taxonomy</subject>
                <subject>Archaeal evolution</subject>
              </subj-group>
            </subj-group>
            <subj-group>
              <subject>Microbial evolution</subject>
            </subj-group>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline">
          <subject>Genetics and Genomics</subject>
          <subject>Microbiology</subject>
          <subject>Biochemistry</subject>
        </subj-group>
      </article-categories><title-group><article-title><italic>Haloquadratum walsbyi</italic> : Limited Diversity in a Global Pond</article-title><alt-title alt-title-type="running-head">Global Diversity of <italic>Haloquadratum</italic></alt-title></title-group><contrib-group>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Dyall-Smith</surname>
            <given-names>Mike L.</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
          <xref ref-type="corresp" rid="cor1">
            <sup>*</sup>
          </xref>
          <xref ref-type="fn" rid="fn1">
            <sup>¤a</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Pfeiffer</surname>
            <given-names>Friedhelm</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Klee</surname>
            <given-names>Kathrin</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
          <xref ref-type="fn" rid="fn2">
            <sup>¤b</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Palm</surname>
            <given-names>Peter</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Gross</surname>
            <given-names>Karin</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">
            <sup>2</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Schuster</surname>
            <given-names>Stephan C.</given-names>
          </name>
          <xref ref-type="aff" rid="aff3">
            <sup>3</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Rampp</surname>
            <given-names>Markus</given-names>
          </name>
          <xref ref-type="aff" rid="aff2">
            <sup>2</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Oesterhelt</surname>
            <given-names>Dieter</given-names>
          </name>
          <xref ref-type="aff" rid="aff1">
            <sup>1</sup>
          </xref>
        </contrib>
      </contrib-group><aff id="aff1"><label>1</label><addr-line>Department of Membrane Biochemistry, Max-Planck-Institute of Biochemistry, Martinsried, Germany</addr-line>       </aff><aff id="aff2"><label>2</label><addr-line>Computing Center (RZG) of the Max-Planck-Society, Max Planck Institute of Plasma Physics, Garching, Germany</addr-line>       </aff><aff id="aff3"><label>3</label><addr-line>Center for Comparative Genomics and Bioinformatics, Penn State University, University Park, Pennsylvania, United States of America</addr-line>       </aff><contrib-group>
        <contrib contrib-type="editor" xlink:type="simple">
          <name name-style="western">
            <surname>Lopez-Garcia</surname>
            <given-names>Purification</given-names>
          </name>
          <role>Editor</role>
          <xref ref-type="aff" rid="edit1"/>
        </contrib>
      </contrib-group><aff id="edit1">Université Paris Sud, France</aff><author-notes>
        <corresp id="cor1">* E-mail: <email xlink:type="simple">mdyall-smith@csu.edu.au</email></corresp>
        <fn fn-type="con">
          <p>Carried out the microbiological and molecular studies: MLD-S. Participated in the design and coordination of the study, sequencing, and drafting of the manuscript: MLD-S. Participated in the design and coordination of the study and drafting of the manuscript: FP. Coordinated bioinformatics analyses: FP MR. Participated in the sequencing: SCS PP KK. Supplied HaloLex tools for the bioinformatic analysis: KG. Conceived the study and participated in its design and coordination: DO. All authors read and approved the final manuscript.</p>
        </fn>
        <fn fn-type="current-aff" id="fn1">
          <label>¤a</label>
          <p>Current address: School of Biomedical Sciences, Charles Sturt University, Wagga Wagga, New South Wales, Australia</p>
        </fn>
        <fn fn-type="current-aff" id="fn2">
          <label>¤b</label>
          <p>Current address: Max-Planck-Institute for Plant Breeding Research, Köln, Germany</p>
        </fn>
      <fn fn-type="conflict">
        <p>The authors have declared that no competing interests exist.</p>
      </fn></author-notes><pub-date pub-type="collection">
        <year>2011</year>
      </pub-date><pub-date pub-type="epub">
        <day>20</day>
        <month>6</month>
        <year>2011</year>
      </pub-date><volume>6</volume><issue>6</issue><elocation-id>e20968</elocation-id><history>
        <date date-type="received">
          <day>28</day>
          <month>3</month>
          <year>2011</year>
        </date>
        <date date-type="accepted">
          <day>14</day>
          <month>5</month>
          <year>2011</year>
        </date>
      </history><!--===== Grouping copyright info into permissions =====--><permissions><copyright-year>2011</copyright-year><copyright-holder>Dyall-Smith et al</copyright-holder><license><license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license></permissions><abstract>
        <sec>
          <title>Background</title>
          <p><italic>Haloquadratum walsbyi</italic> commonly dominates the microbial flora of hypersaline waters. Its cells are extremely fragile squares requiring &gt;14%(w/v) salt for growth, properties that should limit its dispersal and promote geographical isolation and divergence. To assess this, the genome sequences of two isolates recovered from sites at near maximum distance on Earth, were compared.</p>
        </sec>
        <sec>
          <title>Principal Findings</title>
          <p>Both chromosomes are 3.1 MB in size, and 84% of each sequence was highly similar to the other (98.6% identity), comprising the core sequence. ORFs of this shared sequence were completely synteneic (conserved in genomic orientation and order), without inversion or rearrangement. Strain-specific insertions/deletions could be precisely mapped, often allowing the genetic events to be inferred. Many inferred deletions were associated with short direct repeats (4–20 bp). Deletion-coupled insertions are frequent, producing different sequences at identical positions. In cases where the inserted and deleted sequences are homologous, this leads to variant genes in a common synteneic background (as already described by others). Cas/CRISPR systems are present in C23<sup>T</sup> but have been lost in HBSQ001 except for a few spacer remnants. Numerous types of mobile genetic elements occur in both strains, most of which appear to be active, and with some specifically targetting others. Strain C23<sup>T</sup> carries two ∼6 kb plasmids that show similarity to halovirus His1 and to sequences nearby halovirus/plasmid gene clusters commonly found in haloarchaea.</p>
        </sec>
        <sec>
          <title>Conclusions</title>
          <p>Deletion-coupled insertions show that <italic>Hqr. walsbyi</italic> evolves by uptake and precise integration of foreign DNA, probably originating from close relatives. Change is also driven by mobile genetic elements but these do not by themselves explain the atypically low gene coding density found in this species. The remarkable genome conservation despite the presence of active systems for genome rearrangement implies both an efficient global dispersal system, and a high selective fitness for this species.</p>
        </sec>
      </abstract><funding-group><funding-statement>This work was funded by the Max-Planck-Society, Germany, to Dieter Oesterhelt, Director of the Department of Membrane Biochemistry, MPI for Biochemistry (<ext-link ext-link-type="uri" xlink:href="http://www.biochem.mpg.de/en/index.html" xlink:type="simple">http://www.biochem.mpg.de/en/index.html</ext-link>). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement></funding-group><counts>
        <page-count count="23"/>
      </counts></article-meta>
  </front>
  <body>
    <sec id="s1">
      <title>Introduction</title>
      <p>First described in 1980 <xref ref-type="bibr" rid="pone.0020968-Walsby1">[1]</xref>, the square haloarchaeon, <italic>Haloquadratum walsbyi,</italic> is commonly the dominant species found in hypersaline waters such as salt lakes and saltern crystallizer ponds <xref ref-type="bibr" rid="pone.0020968-Oren1">[2]</xref>, <xref ref-type="bibr" rid="pone.0020968-Antn1">[3]</xref>. Its cells are highly distinctive, being thin squares or rectangles, usually containing gas vesicles and polyhydroxybutyrate (PHA) granules <xref ref-type="bibr" rid="pone.0020968-Stoeckenius1">[4]</xref>, <xref ref-type="bibr" rid="pone.0020968-Kessel1">[5]</xref>. It thrives at saturating salt concentrations, where it can represent ≥80% of the microbial population <xref ref-type="bibr" rid="pone.0020968-Oren2">[6]</xref>, and its cytoplasm is completely adapted to function optimally at similarly high levels of potassium chloride. Cell growth requires salt concentrations of at least 14% w/v, or more than 4-fold higher than seawater, and it can also tolerate molar concentrations of Mg<sup>2+</sup> making it one of a limited number of organisms able to cope at such extremely low water activity <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>. In fact, it achieves higher cell densities in media with &gt;1 M MgCl<sub>2</sub> <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>. The genome has a G+C content of 48%, considerably lower than all the other known species of the family <italic>Halobacteriaceae</italic>, which have values of 61–70% <xref ref-type="bibr" rid="pone.0020968-Oren3">[9]</xref>.</p>
      <p>It took until 2004 for these organisms to be cultivated in the laboratory <xref ref-type="bibr" rid="pone.0020968-Bolhuis2">[10]</xref>, <xref ref-type="bibr" rid="pone.0020968-Burns2">[11]</xref>, and two isolates were formally described as a new species in 2007 <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>. They were recovered from saltern crystallizer ponds, one in Australia (isolate C23<sup>T</sup>) and the other in Spain (isolate HBSQ001), using very different isolation methods. The Australian isolate was obtained as a clonal population after serial-dilution to single cells and incubation for 3 weeks (extinction dilution method), while the Spanish isolate was recovered after serial enrichment over 2 years followed by colony purification on agar plates. Neither group aimed to select a specific strain or sequence type, but rather to isolate any member of the square haloarchaea of Walsby (SHOW group). The isolates had very similar 16S rRNA gene sequences, and a DNA-DNA cross-hybridization similarity of 80%, <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>. One notable difference between the isolates was in the structure of their cell walls. Strain HBSQ001 displayed an atypical triple-layered cell wall whereas C23<sup>T</sup> possessed a conventional two-layered structure consisting of the cell membrane covered by a single, external protein S-layer <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>.</p>
      <p>The Spanish isolate (HBSQ001) was sequenced in 2006 <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>, allowing the first description of its overall characteristics and general metabolism <xref ref-type="bibr" rid="pone.0020968-Falb1">[12]</xref>. Its gene density is only 76%, much lower than in other haloarchaea and most prokaryotes, and this was attributed to a plethora of repeat sequences and pseudogenes. The reason for the low gene density is not clear. Features noted as likely to be related to survival up to salt saturation included (a) multiple uptake systems for phosphates and phosphonates, which are limiting nutrients in these environments (b) halomucin, an extraordinary long (9,159 aa), secreted protein that probably protects against cell desiccation, and (c) the presence of genes specifying two different bacteriorhodopsins (BopI, BopII).</p>
      <p>Variation within a local population of <italic>Hqr. walsbyi</italic> has been studied by comparing the genome of HBSQ001 with environmental DNA sequences recovered from the same Spanish saltern from which this organism was isolated i.e., with autochthonous DNA <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>, <xref ref-type="bibr" rid="pone.0020968-Legault1">[14]</xref>. While divergence within the <italic>Haloquadratum</italic> affiliated population, as measured by the metagenomic 16S rRNA gene sequences, was very low (≤1.6%), comparison of the genomic and metagenomic sequences revealed both highly conserved and hypervariable regions, denoted as ‘genomic islands’. The variable sequences represent a pool of genes shared by some members of the population, the so called pan-genome <xref ref-type="bibr" rid="pone.0020968-Tettelin1">[15]</xref> which, for this organism, was estimated to be at least another chromosome equivalent (3 Mb), and probably much more <xref ref-type="bibr" rid="pone.0020968-Legault1">[14]</xref>. The world-wide diversity of this organism has also been examined, largely using 16S rRNA gene sequences, and these data also indicated that <italic>Hqr. walsbyi</italic> populations are highly coherent <xref ref-type="bibr" rid="pone.0020968-Oh1">[16]</xref>, with a level of variation ≤2%, a value very close to the 1.6% divergence observed within the Spanish saltern population. Not only was <italic>Hqr. walsbyi</italic> the dominant microbial group at these sites but it also appeared to be the <italic>only</italic> species of this genus. Further, within these datasets, near identical sequences could be found at distant sites. This picture of global conservation contrasts with other, environmentally common genera of haloarchaea, such as <italic>Halorubrum</italic> and <italic>Haloarcula,</italic> that show much higher and more usual levels of divergence in rRNA genes (∼7%). The latter genera have many recognised species, and environmental sequence studies indicate many additional species are yet to be isolated. Given this background, it was important to assess the true level of divergence of <italic>Haloquadratum</italic> by comparing the genome sequences of isolates recovered from geographically distant sites.</p>
      <p>While hypersaline waters with over 30% w/v salt typically have high concentrations of prokaryotic cells (∼10<sup>7</sup> ml<sup>−1</sup>), and even higher levels of virus particles (∼10<sup>9</sup> particles ml<sup>−1</sup>), there are few if any grazing ciliates and flagellates <xref ref-type="bibr" rid="pone.0020968-PedrosAlio1">[17]</xref>. Viruses are known to be a significant driving force in the evolution of prokaryotes, and in these environments they are the major predator of haloarchaea. Indeed, viral lysis of <italic>Haloquadratum</italic>-like cells in natural hypersaline water has been observed directly by electron-microscopy <xref ref-type="bibr" rid="pone.0020968-GuixaBoixareu1">[18]</xref>. A recently recognised prokaryotic defence mechanism against foreign DNA, usually viruses and plasmids, is the CRISPR (clustered regularly interspersed palindromic repeat) system, which allows cells to specifically recognise and destroy target sequences, and in many respects parallels the function of the RNAi system of eukaryotes <xref ref-type="bibr" rid="pone.0020968-Karginov1">[19]</xref>. CRISPR systems are particularly common in Archaea. The dynamic nature of CRISPR arrays mean they show high sequence variability between members of the same species, and so can be used to type strains, or to assess the prevalent viruses at particular times, or between geographically distant sites <xref ref-type="bibr" rid="pone.0020968-Held1">[20]</xref>.</p>
      <p>About 14 genome sequences of haloarchaea are currently available (Mar, 2011), and this number should expand rapidly to over 100 (<ext-link ext-link-type="uri" xlink:href="http://www.genomesonline.org/gold" xlink:type="simple">http://www.genomesonline.org/gold</ext-link>). However, the only comparison of closely related haloarchaeal species to date has been that of <italic>Halobacterium salinarum</italic> strains R1 and NRC-1, and these genomes are so closely related that they provided only very limited information on their mode of divergence <xref ref-type="bibr" rid="pone.0020968-Pfeiffer1">[21]</xref>. In the current study, the genome sequence of the type strain of <italic>Hqr. walsbyi</italic>, isolate C23<sup>T</sup>, was determined and compared to a geographically distant isolate, HBSQ001. Overall, the two isolates are much less divergent than expected given the enormous distance between the sites of isolation and their very different modes of isolation. However, this high similarity enabled their differences to be mapped precisely. Genomic comparison, along with evidence from environmental DNA sequences, points to a rapid, global dispersal system for <italic>Hqr. walsbyi</italic>, acting to homogenize populations at distant locations. By inference, the predicted seeding by global dispersal must also lead to successful domination of the microbial populations in hypersaline waters around the world.</p>
    </sec>
    <sec id="s2">
      <title>Results and Discussion</title>
      <sec id="s2a">
        <title>Sequence of the <italic>Hqr. walsbyi</italic> C23<sup>T</sup> genome</title>
        <p>The complete sequence was achieved using the previously determined sequence of HBSQ001 as a scaffold to arrange 220 separate contigs derived from 454 sequence reads. Contig gaps were then spanned by PCRs to close the genome. The general features of the circular chromosome, and three plasmids carried by this strain (EMBL accessions FR746099- FR746102), are presented in <xref ref-type="table" rid="pone-0020968-t001">Table 1</xref>. For comparison, the corresponding data of the Spanish strain HBSQ001 are also included. The chromosomes of both are similar in size (∼3.1 Mb) and %G+C content (47.8%). Both have two rRNA operons and 45 tRNA genes, and carry a similar number of predicted ORFs (C23<sup>T</sup>: 2,894 and HBSQ001: 2,819). Curiously, the gene density is relatively low (79%) and there are over 300 pseudogenes (C23<sup>T</sup>, 337; HBSQ001, 314), comprised largely of degraded transposases and conserved hypothetical proteins. The strains differ in their plasmids, with C23<sup>T</sup> carrying one large plasmid (PL100, 100 kb) and two small plasmids of around 6 kb (PL6A, PL6B) while HBSQ001 contains a single, unrelated 47 kb plasmid (PL47). Analyses of tetra-nucleotide frequencies show that the sequences CTAG, GGCC and AGCT are strongly avoided on the main chromosome. GGCC is also avoided on the large plasmid and is absent in the small plasmids (<xref ref-type="supplementary-material" rid="pone.0020968.s002">Table S1</xref>).</p>
        <table-wrap id="pone-0020968-t001" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0020968.t001</object-id><label>Table 1</label><caption>
            <title>General features of <italic>Hqr. walsbyi</italic> C23<sup>T</sup> compared with those of strain HBSQ001.</title>
          </caption><!--===== Grouping alternate versions of objects =====--><alternatives><graphic id="pone-0020968-t001-1" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.t001" xlink:type="simple"/><table>
            <colgroup span="1">
              <col align="left" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
            </colgroup>
            <thead>
              <tr>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="4" rowspan="1">C23<sup>T</sup></td>
                <td align="left" colspan="2" rowspan="1">HBSQ001<xref ref-type="table-fn" rid="nt101">a</xref></td>
                <td align="left" colspan="2" rowspan="1">Totals</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Feature</td>
                <td align="left" colspan="1" rowspan="1">Chromosome</td>
                <td align="left" colspan="1" rowspan="1">PL100</td>
                <td align="left" colspan="1" rowspan="1">PL6A</td>
                <td align="left" colspan="1" rowspan="1">PL6B</td>
                <td align="left" colspan="1" rowspan="1">Chromosome</td>
                <td align="left" colspan="1" rowspan="1">PL47</td>
                <td align="left" colspan="1" rowspan="1">C23<sup>T</sup></td>
                <td align="left" colspan="1" rowspan="1">HBSQ001</td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td align="left" colspan="1" rowspan="1">Length (bp)</td>
                <td align="left" colspan="1" rowspan="1">3,148,033</td>
                <td align="left" colspan="1" rowspan="1">100,258</td>
                <td align="left" colspan="1" rowspan="1">6,129</td>
                <td align="left" colspan="1" rowspan="1">6,056</td>
                <td align="left" colspan="1" rowspan="1">3,132,494</td>
                <td align="left" colspan="1" rowspan="1">46,867</td>
                <td align="left" colspan="1" rowspan="1">3,260,446</td>
                <td align="left" colspan="1" rowspan="1">3,179,361</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">G+C content (%)</td>
                <td align="left" colspan="1" rowspan="1">47.8</td>
                <td align="left" colspan="1" rowspan="1">43.9</td>
                <td align="left" colspan="1" rowspan="1">51.1</td>
                <td align="left" colspan="1" rowspan="1">52.0</td>
                <td align="left" colspan="1" rowspan="1">47.8</td>
                <td align="left" colspan="1" rowspan="1">47.7</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1"/>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">% coding (proteins/RNAs)</td>
                <td align="left" colspan="1" rowspan="1">79.3</td>
                <td align="left" colspan="1" rowspan="1">70.1</td>
                <td align="left" colspan="1" rowspan="1">78.2</td>
                <td align="left" colspan="1" rowspan="1">78.5</td>
                <td align="left" colspan="1" rowspan="1">78.2</td>
                <td align="left" colspan="1" rowspan="1">68.8</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1"/>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Gene distance (Avge)</td>
                <td align="left" colspan="1" rowspan="1">227</td>
                <td align="left" colspan="1" rowspan="1">336</td>
                <td align="left" colspan="1" rowspan="1">176</td>
                <td align="left" colspan="1" rowspan="1">168</td>
                <td align="left" colspan="1" rowspan="1">240</td>
                <td align="left" colspan="1" rowspan="1">393</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1"/>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Predicted ORFs</td>
                <td align="left" colspan="1" rowspan="1">2,894</td>
                <td align="left" colspan="1" rowspan="1">83</td>
                <td align="left" colspan="1" rowspan="1">6</td>
                <td align="left" colspan="1" rowspan="1">6</td>
                <td align="left" colspan="1" rowspan="1">2,819</td>
                <td align="left" colspan="1" rowspan="1">38</td>
                <td align="left" colspan="1" rowspan="1">2,989</td>
                <td align="left" colspan="1" rowspan="1">2,857</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Pseudogenes</td>
                <td align="left" colspan="1" rowspan="1">323</td>
                <td align="left" colspan="1" rowspan="1">14</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">314</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">337</td>
                <td align="left" colspan="1" rowspan="1">314</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">rRNA operons (16S, 23S, 5S)</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">2</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">tRNA genes</td>
                <td align="left" colspan="1" rowspan="1">45</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">45</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">45</td>
                <td align="left" colspan="1" rowspan="1">45</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Other RNAs (7S, RNAseP)</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">2</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Avge pI of proteins</td>
                <td align="left" colspan="1" rowspan="1">5.1</td>
                <td align="left" colspan="1" rowspan="1">5.0</td>
                <td align="left" colspan="1" rowspan="1">4.6</td>
                <td align="left" colspan="1" rowspan="1">4.6</td>
                <td align="left" colspan="1" rowspan="1">5.1</td>
                <td align="left" colspan="1" rowspan="1">5.2</td>
                <td align="left" colspan="1" rowspan="1">5.1</td>
                <td align="left" colspan="1" rowspan="1">5.1</td>
              </tr>
            </tbody>
          </table></alternatives><table-wrap-foot>
            <fn id="nt101">
              <label>a</label>
              <p>The data for HBSQ001 have been updated from the previous publication. The number of ORFs is reduced because pseudogenes are now counted as a single ORF even if they consist of several fragments.</p>
            </fn>
          </table-wrap-foot></table-wrap>
      </sec>
      <sec id="s2b">
        <title>General organization of the chromosome of strain C23<sup>T</sup></title>
        <p><xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref> presents the results of several global analyses of the main chromosome of C23<sup>T</sup>. Major deviations from the average %G+C content (topmost plot) correlate closely with changes in tetramer frequency, as shown by the intense vertical bands in the TETRA plot below (third level). The second level graph shows the distribution of pseudogenes derived from non-transposase ORFs, and these are clearly associated with many of the variant regions identified in the adjacent TETRA and %G+C plots. Bacterial genomes often show large-scale organizational patterns, such as a systematic bias in the nucleotide composition of their leading and lagging strands, preferential placement of ORFs on the leading strand, and highly expressed genes close to the replication origin <xref ref-type="bibr" rid="pone.0020968-Shimizu1">[22]</xref>, <xref ref-type="bibr" rid="pone.0020968-VieiraSilva1">[23]</xref>. In such cases, a plot of cumulative GC-skew versus genome position can show a simple, geometric pattern where the replication origin and the terminus occur near major inflections <xref ref-type="bibr" rid="pone.0020968-Lobry1">[24]</xref>, <xref ref-type="bibr" rid="pone.0020968-Lopez1">[25]</xref>. In general, statistical deviations are much weaker in archaea so that cumulative GC-skew plots do not give a simple pattern (fifth level of <xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>), nor does the GC-profile graph, a type of cumulative GC-skew that is more sensitive to local changes in %G+C content <xref ref-type="bibr" rid="pone.0020968-Gao1">[26]</xref>, shown just above it. However, in comparing the plots from the two strains (see later) one can distinguish between strong local deviations due to insertion of foreign DNA and weak positional deviations related to replication origins. Strong changes in the GC-profile correlate well with significant alterations in %G+C and tetramer composition. These atypical genome regions represent a mixture of unusual genomic features, described in detail below. Two prominent features are, (1) near the left end of the %G+C panel a distinct peak of higher %G+C, labeled <italic>hmuI</italic> (corresponding to a very long ORF encoding the halomucin gene with a highly biased codon usage) and, (2) a peak of lower %G+C at around 1.6 Mb that corresponds to a prophage, integrated into a tRNA gene.</p>
        <fig id="pone-0020968-g001" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g001</object-id>
          <label>Figure 1</label>
          <caption>
            <title>Features of the strain C23<sup>T</sup> chromosome.</title>
            <p>The constant horizontal axis in all cases is the genome from left to right (first to last base of deposited sequence), with a scale given in Mbp. From top to bottom are plots of: (a) %G+C if the deviation for a 1 kb window is more than 2.5 SD from the average, (b) protein-coding pseudogenes (vertical triangles), excluding those of transposases, (c) variation in tetramer nucleotide composition (TETRA), where darker colors indicated more prominent deviation, (d) GC-profile, (e) cumulative GC-skew, (f) positions and orientations of the following gene categories: CDC6, <italic>orc1</italic>/<italic>cdc6</italic> homologues; tRNA, transfer RNA genes; rRNA, ribosomal RNA operons; r-Prot, ribosomal protein genes; RNAP, RNA polymerase subunit genes; CRISPR, loci of clustered regularly interspersed short palindromic repeats. Smaller, unfilled arrowheads in the CDC6 line represent the positions of <italic>cdc6</italic> pseudogenes. DV6 (divergent region 6, see <xref ref-type="fig" rid="pone-0020968-g005">Figure 5</xref>) is indicated below the cumulative GC-skew plot. Vertical grey-shaded stripes mark correlating features.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g001" xlink:type="simple"/>
        </fig>
        <p>Although the GC-skew plots were not able to directly identify likely replication origins, archaeal origin-of-replication sequences (<italic>ori</italic>) typically occur adjacent to Cdc6/Orc genes <xref ref-type="bibr" rid="pone.0020968-Myllykallio1">[27]</xref>, <xref ref-type="bibr" rid="pone.0020968-Coker1">[28]</xref>, <xref ref-type="bibr" rid="pone.0020968-Norais1">[29]</xref>. Haloarchaea can carry up to 17 copies of such genes <xref ref-type="bibr" rid="pone.0020968-Barry1">[30]</xref> but the number of origins is usually much lower <xref ref-type="bibr" rid="pone.0020968-Norais1">[29]</xref>. Strain C23<sup>T</sup> possesses nine chromosomal Orc/Cdc6 genes, five of which appear to be complete (solid black arrowheads, CDC6 plot in <xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>), and four are pseudogenes (smaller, unfilled arrowheads). In addition, two other Orc/Cdc6 genes are carried on the large plasmid, PL100 (see below).</p>
        <p>The protein sequence of the first chromosomal Cdc6 ORF, Hqrw_1001, is most closely related to HVO_0001 of <italic>Hfx. volcanii</italic> and VNG2411G/OE4380F of <italic>Hbt. salinarum</italic>, both of which lie next to the main replication origin, <italic>oriC1</italic> <xref ref-type="bibr" rid="pone.0020968-Coker1">[28]</xref>. The ORFs surrounding Hqrw_1001 are also very similar to those found near <italic>oriC1</italic> in other haloarchaea <xref ref-type="bibr" rid="pone.0020968-Pfeiffer1">[21]</xref>, <xref ref-type="bibr" rid="pone.0020968-Coker1">[28]</xref>, <xref ref-type="bibr" rid="pone.0020968-Norais1">[29]</xref>. Upstream of Hqrw_1001 is the probable <italic>oriC1</italic> of <italic>Hqr. walsbyi</italic> C23<sup>T</sup>, characterized by sequence elements typical of replication origins of haloarchaea (<xref ref-type="supplementary-material" rid="pone.0020968.s001">Figure S1</xref>), including long inverted repeats surrounding an AT-rich sequence (potential DNA unwinding domain, DUE) <xref ref-type="bibr" rid="pone.0020968-Myllykallio1">[27]</xref>, <xref ref-type="bibr" rid="pone.0020968-Coker1">[28]</xref>, <xref ref-type="bibr" rid="pone.0020968-Berquist1">[31]</xref>. Apart from Hqrw_1001, only two other complete homologues have sufficient, non-coding upstream sequences to contain potential further origins, Hqrw_3381 and Hqrw_3385. These lie very close to each other, are inward facing, and have <italic>ori</italic>-like sequence motifs nearby (<xref ref-type="supplementary-material" rid="pone.0020968.s001">Figure S1</xref>). These may represent one or two additional origins, and have been labeled <italic>oriC2</italic> and <italic>oriC3</italic> in <xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>. As shown in the fifth and sixth panels of <xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>, the predicted replication origins are near high points in the cumulative GC-skew graph, while the minimum (∼1.2 Mb) occurs almost mid-way between <italic>oriC1</italic> and <italic>oriC2</italic>/<italic>oriC3</italic>, and is likely to include a site or region where replication terminates. Multiple replication origins are common in Archaea <xref ref-type="bibr" rid="pone.0020968-Coker1">[28]</xref>, <xref ref-type="bibr" rid="pone.0020968-Robinson1">[32]</xref>.</p>
        <p>All 45 tRNA genes are located on the main chromosome. Rather than an even distribution, they show some tendency to cluster near the predicted replication origins, and to avoid the region furthest away from the origins, where replication is likely to terminate (1.1–1.6 Mb). In contrast to the relatively relaxed organization of tRNA genes in <italic>Haloquadratum</italic>, thermophilic Archaea such as <italic>Sulfolobus</italic> display a strong clustering of tRNA genes <xref ref-type="bibr" rid="pone.0020968-Reno1">[33]</xref>. There are two rRNA operons in <italic>Haloquadratum</italic>, the first (at ∼0.07 Mb) faces away and is very close to <italic>oriC1</italic>. The second (at ∼1.8 Mb) is almost diametrically opposite the first on the circular chromosome but is some distance from the potential <italic>oriC2/oriC3</italic> at ∼2.4 Mb, again facing away. The two 16S rRNA genes differ by 3 bases while the corresponding 5S and 23S rRNA genes are identical. The next two lower panels in <xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref> show the positions of protein-coding genes that are likely to be highly expressed; those for ribosomal proteins and RNA polymerase subunits. Their distribution is similar to the tRNA genes. The three CRISPR loci (crispr-1, -2 and -3) are indicated in the lowest panel of <xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>. They occur within extended genomic regions of distinct tetra-nucleotide frequency, indicating foreign DNA. Overall, the genome appears to show some degree of global modeling, oriented around the predicted replication origins, but disturbances occur in regions likely to represent insertions of foreign DNA.</p>
      </sec>
      <sec id="s2c">
        <title>tRNAs and codon usage</title>
        <p><italic>Haloquadratum</italic> is unusual in having a genome with a %G+C that is about 20 percentage points lower than all other genera within the family <italic>Halobacteriaceae</italic>. The most parsimonious explanation is that <italic>Haloquadratum</italic> has evolved from the higher level down to its current value. As expected from such a change, <italic>Hqr. walsbyi</italic> shows a strong bias towards codons with A or T in the 3<sup>rd</sup> position, relative to the codon usage of other haloarchaea (ca. 60% A+T <italic>vs</italic> ca. 20%), while the bias is only slight for the 1<sup>st</sup> and 2<sup>nd</sup> position <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>. The strength of this bias indicates a rapid evolutionary drift towards a lower GC content. Due to this strong bias, ten different amino acids in this organism have NNT as the preferred codon in their synonymous codon sets (<xref ref-type="supplementary-material" rid="pone.0020968.s003">Table S2</xref>) <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>. However, as noted in the literature of codon-anticodon interactions, tRNA anticodons with A in the first position (which decode NNT codons), are generally uncommon, and in <italic>Haloquadratum</italic> there are <underline>no</underline> such tRNAs present in the genome. This means that despite a preference for NNT codons, all of these must be decoded by tRNA anticodons using G:U base-pairing, highlighting the view that “the anticodon-codon wobble base pair of a G<sub>34</sub> with a U3 … is almost isomorphic/isosteric to a Watson-Crick base pair.” <xref ref-type="bibr" rid="pone.0020968-Agris1">[34]</xref>. tRNA base modifications also determine specificity, and have been examined best in <italic>Haloferax volcanii</italic> <xref ref-type="bibr" rid="pone.0020968-Grosjean1">[35]</xref>, but not yet in <italic>Haloquadratum</italic>.</p>
        <p>Two tRNAs contain introns (tRNA-Trp, tRNA-Met2), while tRNA-Ile1 has a CAT anticodon that is modified <xref ref-type="bibr" rid="pone.0020968-Ikeuchi1">[36]</xref>, enabling it to switch its specificity and act as a TAT anticodon, and decode Ile (ATA) codons. The genome also contains two partial tRNA genes that are associated with prophages (see below).</p>
      </sec>
      <sec id="s2d">
        <title>Plasmids</title>
        <p>Plasmid extracts of <italic>Hqr. walsbyi</italic> C23<sup>T</sup> cells revealed a small, 6 kb multi-copy plasmid (∼30 copies/genome equivalent) designated PL6, and a much larger, low copy-number plasmid, PL100 (<xref ref-type="fig" rid="pone-0020968-g002">Figure 2A and B</xref>). Panel A shows a typical gel profile of un-restricted plasmid DNA, with the smaller plasmid band displaying super-coiled, open-circular and dimeric forms. Loading more plasmid extract (panel B) revealed the large plasmid, PL100, running above the fragmented chromosomal DNA band. Plasmid preparations from the previously sequenced Spanish isolate, HBSQ001, and from of a novel, Australian isolate of <italic>Haloquadratum</italic>, strain Bajool9 <xref ref-type="bibr" rid="pone.0020968-Oh1">[16]</xref>, were also included in Panel A. The Spanish strain lacks small plasmids and its large 47 kb plasmid (PL47) is not visible in this gel. The Bajool9 isolate was recovered from a saltern crystallizer in Bajool, Queensland, Australia <xref ref-type="bibr" rid="pone.0020968-Oh1">[16]</xref>, some 1700 km north of the crystallizer from which C23<sup>T</sup> was isolated. It also harbours a ∼6 kb plasmid, as well as another, much smaller plasmid (faintly visible at the bottom of the lane, running between the 1 and 2 kb size markers).</p>
        <fig id="pone-0020968-g002" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g002</object-id>
          <label>Figure 2</label>
          <caption>
            <title>Plasmids of strains C23<sup>T</sup>, HBSQ001, and Bajool9.</title>
            <p>Plasmid preparations from the three strains of <italic>Haloquadratum</italic> were separated by electrophoresis on 1% agarose gels and stained with ethidium bromide. Panel A: the plasmid bands labeled PL6 are super-coiled (sc), open-circular (oc) and dimeric forms of two, closely related, ∼6 kb plasmids (see text for details). The Bajool9 isolate of <italic>Haloquadratum</italic> <xref ref-type="bibr" rid="pone.0020968-Oh1">[16]</xref> is also seen to carry a similarly sized plasmid as well as a much smaller one, running near the bottom of the gel. The position of contaminating, sheared chromosomal DNA is indicated at the right of each panel (chr). Panel B: more of the plasmid preparation from isolate C23<sup>T</sup> has been loaded, revealing the large plasmid, PL100. In both panels, MW indicates the 1 kb DNA size ladder (Invitrogen), with lengths indicated in kb at the left.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g002" xlink:type="simple"/>
        </fig>
      </sec>
      <sec id="s2e">
        <title>Sequences of the small plasmids of strain C23<sup>T</sup></title>
        <p>Genomic sequencing revealed that the PL6 plasmid band observed in C23<sup>T</sup> consists of two closely related plasmids, designated PL6A and PL6B. These were in approximately equal proportions in the population, as judged by restriction digests and sequenceing reads (data not shown). Their %G+C is significantly higher than that of the chromosome (51–52% versus 47.8%). They show high sequence identity, particularly over the first two ORFs, but this drops sharply at around 3.2 kb, (arrowed in <xref ref-type="fig" rid="pone-0020968-g003">Figure 3</xref>), following immediately after a motif consisting of two direct repeats of the sequence ACAGATTA bordered by an inverted repeat. Both plasmids have six predicted ORFs that show a similar organizational pattern. The corresponding ORFs between plasmids are predicted to code for proteins of similar size and amino acid sequence (<xref ref-type="fig" rid="pone-0020968-g003">Figure 3</xref>).</p>
        <fig id="pone-0020968-g003" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g003</object-id>
          <label>Figure 3</label>
          <caption>
            <title>Comparison of circular plasmids PL6A (6,129 bp) and PL6B (6,056 bp).</title>
            <p>Predicted ORFs are labeled with their locus tags, and arrows represent their orientations and lengths. Nucleotide positions are given above each plasmid. Nucleotide similarity is indicated by the bar chart below the alignment (bar height and darkness of colour indicate the level of nucleotide identity, with solid black indicating 100% identity). The vertically oriented arrowhead at around 3.2 kb indicates a sharp break in homology. Percentage identity values of the corresponding protein sequences (in red) are given between ORFs in the alignment. H-T-H, helix-turn-helix protein domain. TM, transmembrane domain. Solid black arrowheads represent short repeat sequences (see text). The lowest panel shows the cumulative CT-skew plots of both sequences.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g003" xlink:type="simple"/>
        </fig>
        <p>The six annotated ORFs of PL6A or PL6B show few protein motifs that could give clues to function. None show significant homology to any of the known Rep, Cdc6, MCM helicases or DNA polymerases, nor are there any detectable <italic>oriC1</italic>-like origin-of-replication-box (ORB) motifs, so the actual mode of plasmid replication remains uncertain. However, archaeal plasmids are known to display considerable variation in their replication systems <xref ref-type="bibr" rid="pone.0020968-Greve1">[37]</xref>, <xref ref-type="bibr" rid="pone.0020968-Soler1">[38]</xref>. Helix-turn-helix motifs, indicative of DNA binding, are predicted for Hqrw_6001, 6002 and 6005 (and the corresponding PL6B ORFs), and the latter protein also contains a P-loop ATPase-like motif (see also below). Intriguingly, Hqrw_6007 and Hqrw_7007 were detected by mass spectrometry in purified cell membrane preparations of C23<sup>T</sup> (<xref ref-type="supplementary-material" rid="pone.0020968.s004">Table S3</xref>), a cellular location consistent with their predicted C-terminal membrane anchors. The N-termini of these proteins are similar to members of the CopG/MetJ family of repressors, indicating probable binding to target sites on PL6. If so, the bifunctional properties of these proteins suggest they could tether the PL6 plasmids to the cell membrane. Repeats and inverted repeat sequences occur within the two long intergenic regions found between Hqrw_6007 and Hqrw_6001, and between Hqrw_6002 and Hqrw_6004. A long, inverted repeat occurs in both plasmids at a similar position (nt 107–123, PL6A), and is flanked by ORFs with predicted DNA binding motifs. Cumulative nucleotide skew plots that include thymidine show a strong inflection in the intergenic sequence at about 3.1 kb (a highly conserved region), with a maximum in the intergenic sequence between the sixth and first ORFs (<xref ref-type="fig" rid="pone-0020968-g003">Figure 3</xref>). The arrangement of these conserved repeats at almost diametrically opposite sites (0.1 and ∼3.2 kb), and their correlation with inflections in nucleotide skew, suggest they may represent replication origins and termini (assuming a theta mode of replication).</p>
        <p>The nucleotide sequences of the PL6 plasmids were not similar to the main chromosome, or to PL100 of C23<sup>T</sup>, or to the genome of HBSQ001, or other reported haloarchaeal plasmids. Partial sequencing of the 6 kb plasmid in the Bajool9 strain revealed it to be similar in sequence and gene organization to PL6A or PL6B (85% nt identity, data not shown). BLASTN searches of the CAMERA metagenomic database gave numerous matches to DNA sequences recovered from a saltern crystallizer pond in the USA (<xref ref-type="table" rid="pone-0020968-t002">Table 2</xref>). Hundreds of matches were found in the larger dataset of environmental sequences recovered from Lake Tyrrell, a hypersaline lake in Australia (<xref ref-type="table" rid="pone-0020968-t002">Table 2</xref>). Sequence reads from this dataset could be retrieved that showed high nucleotide identity to PL6, and these could be assembled into contigs of up to 6 kb that displayed co-linear sequence similarity to these plasmids (data not shown). While no matches to metagenomic data from the Santa Pola saltern in Spain were observed, recently available sequences from a viral metagenome of the same saltern showed several TBLASTX matches to PL6 ORFs Hqrw_6005 and Hqrw_7005 (see later). The presence of <italic>Hqr. walsbyi</italic> 16S rRNA gene sequences in the metagenomic data from the three sites confirmed the presence of this species in all three datasets. The data indicate that PL6-like plasmids are widely distributed.</p>
        <table-wrap id="pone-0020968-t002" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0020968.t002</object-id><label>Table 2</label><caption>
            <title>PL6 plasmid related sequences in metagenomic data.</title>
          </caption><!--===== Grouping alternate versions of objects =====--><alternatives><graphic id="pone-0020968-t002-2" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.t002" xlink:type="simple"/><table>
            <colgroup span="1">
              <col align="left" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
            </colgroup>
            <thead>
              <tr>
                <td align="left" colspan="1" rowspan="1">Sample (site)</td>
                <td align="left" colspan="1" rowspan="1">Accession/Project<xref ref-type="table-fn" rid="nt102">a</xref></td>
                <td align="left" colspan="1" rowspan="1">Reference</td>
                <td align="left" colspan="1" rowspan="1">BLASTN hits<xref ref-type="table-fn" rid="nt103">b</xref></td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td align="left" colspan="1" rowspan="1">Saltern metagenome (San Diego Bay, USA)</td>
                <td align="left" colspan="1" rowspan="1">CAM_PROJ_SalternMetagenome<xref ref-type="table-fn" rid="nt102">a</xref></td>
                <td align="left" colspan="1" rowspan="1">
                  <xref ref-type="bibr" rid="pone.0020968-Dinsdale1">[79]</xref>
                </td>
                <td align="left" colspan="1" rowspan="1">11 (PL6A), 16 (PL6B)</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Saltern metagenome (Santa Pola, Alicante, Spain)</td>
                <td align="left" colspan="1" rowspan="1">Metagenome of Marine NaCl-Saturated Brine<xref ref-type="table-fn" rid="nt102">a</xref></td>
                <td align="left" colspan="1" rowspan="1">
                  <xref ref-type="bibr" rid="pone.0020968-Legault1">[14]</xref>
                </td>
                <td align="left" colspan="1" rowspan="1">0 (PL6A, PL6B)</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Metavirome (Santa Pola, Alicante, Spain)</td>
                <td align="left" colspan="1" rowspan="1">GU735174, GU735304, GU735358, GU735225, GU735307, GU735310<xref ref-type="table-fn" rid="nt104">c</xref></td>
                <td align="left" colspan="1" rowspan="1">
                  <xref ref-type="bibr" rid="pone.0020968-Santos1">[51]</xref>
                </td>
                <td align="left" colspan="1" rowspan="1">TBLASTX, E ≤10<sup>−6</sup></td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Salt Lake metagenome (Lake Tyrrell, Victoria, Australia)</td>
                <td align="left" colspan="1" rowspan="1">JCVI<xref ref-type="table-fn" rid="nt105">d</xref> (GS–84, 2005)</td>
                <td align="left" colspan="1" rowspan="1">JCVI, unpublished data</td>
                <td align="left" colspan="1" rowspan="1">719 (PL6A), 644 (PL6B)</td>
              </tr>
            </tbody>
          </table></alternatives><table-wrap-foot>
            <fn id="nt102">
              <label>a</label>
              <p>CAMERA database, <ext-link ext-link-type="uri" xlink:href="http://camera.calit2.net/about-camera/full-datasets" xlink:type="simple">http://camera.calit2.net/about-camera/full-datasets</ext-link>.</p>
            </fn>
            <fn id="nt103">
              <label>b</label>
              <p>matches with expect values better than 10<sup>−10</sup>.</p>
            </fn>
            <fn id="nt104">
              <label>c</label>
              <p>All six matching sequences were to Hqrw_6005/Hqrw_7005.</p>
            </fn>
            <fn id="nt105">
              <label>d</label>
              <p>JCVI, J. Craig Venter Institute, USA. Data to be released in 2011 (personal communication, Matt Lewis, JCVI).</p>
            </fn>
          </table-wrap-foot></table-wrap>
        <p>Comparing PL6 to the genome sequences of other microorganisms, the second ORF (Hqrw_6002/7002) gave significant matches to numerous archaeal homologues. In their genomic contexts, these homologues are frequently adjacent to a previously described group of well-conserved halovirus genes, initially identified from the sequence of halovirus His2 <xref ref-type="bibr" rid="pone.0020968-Bath1">[39]</xref>. A block of four genes including that for the major virus capsid protein of His2 is found in many haloarchaeal genomes as well as pHK2, a small haloarchaeal plasmid. Although these genomic homologues do not appear to be His2-like prophages <xref ref-type="bibr" rid="pone.0020968-Bath1">[39]</xref> their distribution and conservation somehow relates haloviruses and haloarchaeal plasmid genes at specific sites of genomic integration <xref ref-type="bibr" rid="pone.0020968-Bath1">[39]</xref>, <xref ref-type="bibr" rid="pone.0020968-Holmes1">[40]</xref>, <xref ref-type="bibr" rid="pone.0020968-Holmes2">[41]</xref>. Two recently described pleomorphic haloviruses, HRPV-1 and HHPV-1, also contain a similar block of homologues related to His2, plasmid pHK2 and the same genomic loci <xref ref-type="bibr" rid="pone.0020968-Pietila1">[42]</xref>, <xref ref-type="bibr" rid="pone.0020968-Roine1">[43]</xref>, further consolidating this link. A summary of the relationships between PL6, the three haloviruses, and the related loci from six different genera of haloarchaea is presented in <xref ref-type="fig" rid="pone-0020968-g004">Figure 4</xref>. In most cases, homologues of PL6 ORF Hqrw_6002 are found either immediately adjacent to the His2-related gene cluster or separated by a small number of intervening genes. In the case of <italic>Hfx. volcanii</italic>, the intervening genes are three consecutive insertion sequences. <italic>Archaeoglobus</italic>, which is not a member of the family Halobacteriaceae but belongs to the same archaeal phylum (Euryarchaeota), also contains a His2 VP1 homologue, which is near a virus integrase. The proximity, consistent orientation, and wide phylogenetic distribution of this gene arrangement speaks of an important and complex network of DNA cross-talk; one that spans many of the known genera of haloarchaea and involves a diverse group of plasmids and haloviruses. In some respects, these appear similar to Mavericks, a class of mobile elements found in Eukaryotes <xref ref-type="bibr" rid="pone.0020968-Pritham1">[44]</xref>.</p>
        <fig id="pone-0020968-g004" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g004</object-id>
          <label>Figure 4</label>
          <caption>
            <title>Plasmid PL6-associated virus genes in haloarchaea.</title>
            <p>Alignment of genomic regions of <italic>Haloarcula marismortui</italic> (Hmar), <italic>Haloferax volcanii</italic> (Hvol), <italic>Halomicrobium mukohataei</italic> (Hmuk), <italic>Halorhabdus utahensis</italic> (Huta), <italic>Natronomonas pharaonis</italic> (Napha), <italic>Archaeoglobus fulgidus</italic> (Archaeoglobus) and <italic>Natrialba magadii</italic> (Nmag) with the corresponding genes of haloviruses His2, HRPV-1 and plasmid pHK2 (top) and <italic>Hqr. walsbyi</italic> C23<sup>T</sup> plasmid PL6A (bottom). Locus tags or ORF numbers are given for most ORFs. Homologues of the capsid protein genes of His2 are colour coded (red, green, blue and grey). Homologues of PL6 ORF2 genes are coloured yellow. Integrase (Int) and transposase (Tnp) genes are coloured black. Dashed lines indicate related genes (other than those colour coded) that occur between two or more genomes. Asterisks within the His2 VP1 ORF and its homologues indicate a predicted signal sequence. Genomic position scales are indicated for each genome in kilobase pairs.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g004" xlink:type="simple"/>
        </fig>
        <p>The PL6 homologues Hqrw_6005 and Hqrw_7005 were specifically related to halovirus His1 protein His1V_gp16, and to an ORF of <italic>Nmn. pharaonis</italic> (NP3284A). The latter ORF occurs within a 13 kb region that also includes a CRISPR system, and which has been inserted into a chromosome-integrated copy of the <italic>Nmn. pharaonis</italic> plasmid PL23. All these homologues possess a conserved P-loop ATPase motif. Halovirus His1 is the type species of the Salterprovirus group, to which halovirus His2 also belongs <xref ref-type="bibr" rid="pone.0020968-Bath1">[39]</xref>, so the relationship observed in this study between PL6, His1, and genomic loci carrying His2-related genes provides further support for a virus link between these elements. Indeed, there is a strong possibility that PL6 is a provirus.</p>
        <p>We propose that this family of archaeal genomic loci that include halovirus, and now PL6 homologues, be referred to as <underline>vi</underline>rus and <underline>p</underline>lasmid <underline>r</underline>elated <underline>e</underline>lement<underline>s</underline> (ViPREs). Understanding the mechanisms, significance and impacts of ViPREs on the evolution of haloarchaea (and perhaps other <italic>Euryarchaeota</italic>) and their viruses is clearly a high priority.</p>
      </sec>
      <sec id="s2f">
        <title>Plasmid PL100</title>
        <p>The large plasmid of C23<sup>T</sup> is 100,258 bp in length, 4 percentage points lower in G+C than the main chromosome (43.9%) and predicted to code for 83 ORFs. It is more than twice the size of the 47 kb plasmid in HBSQ001 (PL47), and although the two plasmids appear to be largely unrelated they both carry a preponderance of hypothetical or conserved hypothetical proteins without discernable function. ORFs with assigned function on PL100 include those involved in plasmid maintenance or replication, restriction endonucleases, methylases and helicases. Two <italic>cdc6</italic> gene homologues, Hqrw_5030 and Hqrw_5083, are situated approximately opposite each other on the plasmid, and their upstream regions possess ORB-like repeat motifs consistent with archaeal replication origins (<xref ref-type="supplementary-material" rid="pone.0020968.s001">Figure S1</xref>, OriP1 and OriP2). Nucleotide skew analyses indicated only small inflections near both, but as there are no RepH homologues, replication is most likely initiated near these <italic>cdc6</italic> genes.</p>
        <p>Plasmid PL100 was unstable in laboratory cultures of C23<sup>T</sup>, and was lost from the culture used to prepare DNA for genome sequencing. Some sequences of PL100 had been recovered from an earlier, preliminary genome sequencing effort, and this allowed PCR primers to be designed to detect the presence of the plasmid. Screening cultures for successful PCR amplification identified one (out of 6) that had retained the plasmid, and DNA from this source was used to PCR amplify across contig gaps and complete the sequence (see <xref ref-type="sec" rid="s3">Materials and Methods</xref>). From these observations, PL100 is readily dispensable, at least under laboratory conditions.</p>
      </sec>
      <sec id="s2g">
        <title>Comparative overview of the chromosomes of C23<sup>T</sup> and HBSQ001</title>
        <p>The close relationship of C23<sup>T</sup> and HBSQ001 is clearly seen in a nucleotide alignment of the two chromosomes (<xref ref-type="fig" rid="pone-0020968-g005">Figure 5</xref>), where only minor shifts from a straight line due to indels occur; up-shifts indicating insertions in HBSQ001 and downshifts for insertions in C23<sup>T</sup>. There are no large-scale rearrangements – the two nucleotide sequences are co-linear, sharing 98.6% nucleotide similarity. Panel C is a different representation of the aligned chromosomes using the Artemis Comparison Tool (ACT) that shows pattern of variation between the chromosomes. Together with panel A, the two graphs present a typical pattern of intra-species variation seen in many prokaryotes: a backbone core sequence with high similarity that is interspersed with strain-specific indels.</p>
        <fig id="pone-0020968-g005" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g005</object-id>
          <label>Figure 5</label>
          <caption>
            <title>Comparison of the chromosomes of <italic>Hqr. walsbyi</italic> strains C23<sup>T</sup> and HBSQ001.</title>
            <p>Panel A: Mummer nucleotide alignment, where dots indicate similar sequences in the same orientation (red), or reverse orientation (blue), shared by the two strains. Panels B and D: Tetra-nucleotide variation (TETRA) along the chromosomal sequences of each strain (labeled). Several divergent regions in each plot are labeled in both panels (see text). The %G+C deviation (if a 1 kb window is more than 2.5 SD from the average) is given for HBSQ001 immediately below the panel TETRA plot of panel D. Panel C: comparison of the chromosomal sequences using the Artemis comparison tool (ACT). Red lines indicate regions of high nucleotide similarity; white regions indicate lack of similarity. Above this plot regions of major divergence (DV 1-12) are indicated (and also on the Mummer plot in panel A), while below the previously described genomic islands (GI 1-4 <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>) are indicated. Panel E: Cumulative GC-skew plots for the chromosomes of C23<sup>T</sup> (red) and HBSQ001 (black). The boxed region extending upwards to panel D outlines the major drop in the cumulative skew plot of HBSQ001.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g005" xlink:type="simple"/>
        </fig>
        <p>Twelve prominent regions of strain-specific variation (divergent regions DV1 - DV12) are indicated above the ACT alignment. Below the alignment, the positions of genomic islands (GI 1-4) of HBSQ001, as described previously by Cuadros-Orellana <italic>et al.</italic> <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>, are shown. Genomic islands are regions of high variability that were identified when the HBSQ001 genome was compared to autochthonous, metagenomic DNA sequences. The DV and GI sequences are discussed in more detail below, but it is clear that the GIs also correspond to regions of major difference between HBSQ001 and C23<sup>T</sup> seen in the current study, i.e. GI I  =  DV2, GI 2  =  DV6, GI 3  =  DV11, GI 4  =  DV12. Panels B and D plot the variation in tetramer frequency of the two strains, and show that some regions of divergent sequence composition are common to both (such as the halomucin genes at around 0.6 Mb, DV1) while others are strain-specific, often coinciding with large indels and indicative of foreign DNA insertions. Panel E compares the cumulative GC-skew plots of both chromosomes, which show major differences at positions corresponding to large indels. The dramatic drop in the HBSQ001 plot at around 2.6 Mb is due to a strain-specific integration of a prophage (see below). The prophage provides HBSQ001 with two additional <italic>cdc6</italic> genes compared to C23<sup>T</sup>, but these are closely spaced and outward facing, and are not associated with nearby <italic>ori</italic>-like sequence motifs. The replication origins used by HBSQ001 are probably the same as the corresponding sequences in C23<sup>T</sup>, as these are conserved, but the <italic>cdc6</italic> gene (HQ2952A) corresponding to that near <italic>ori</italic>C2 of C23<sup>T</sup> has been inactivated by an insertion. The adjacent <italic>ori</italic> could still function, as replication origins in haloarchaea can use Cdc6 proteins synthesized from genes other than the adjacent copy <xref ref-type="bibr" rid="pone.0020968-Norais1">[29]</xref>.</p>
      </sec>
      <sec id="s2h">
        <title>Systematic comparison of the chromosomes of C23<sup>T</sup> and HBSQ001</title>
        <p>The close similarity of the two strains enabled a systematic and precise analysis of all the differences between their chromosomes. A custom PERL-based script (as outlined in <xref ref-type="sec" rid="s3">Materials and Methods</xref>) was applied that could perform a base-by-base comparison of very similar sequences while being tolerant to insertions and deletions of any size as well as large sequence duplications. Trivial differences between the strains were excluded (e.g. point mutations), based on criteria defined in <xref ref-type="sec" rid="s3">Materials and Methods</xref>. Although the constraints were relatively relaxed, the resulting set contained only 512 non-trivial differences, emphasizing the very close relationship of the two strains. These differences were manually inspected to eliminate simple cases of regions with enhanced sequence divergence, and this left a total of 360 differences, representing the final set of strain-specific sequences (<xref ref-type="supplementary-material" rid="pone.0020968.s005">Tables S4</xref> and <xref ref-type="supplementary-material" rid="pone.0020968.s006">S5</xref>).</p>
        <p>The remainder of the two chromosomes defines the common shared sequences of the two strains, comprising about 84% of each genome. These sequences are 98.6% identical at the DNA level and are absolutely synteneic. They do not show even a single genome rearrangement (inversion or transposition). Sequence identity remains high even in intergenic regions (which represent a significant proportion of the genome in <italic>Haloquadratum</italic>) and in genes that are not well conserved among haloarchaea. Therefore, we prefer the term “shared sequence” instead of “core sequence” as the latter implies restriction of the analysis to only the well-conserved sequences of both genomes.</p>
        <p>In other studies comparing strains of the same prokaryotic species, core sequence similarities of 98–99% are commonly observed but with intergenic regions excluded. Also, these genomes usually show inversions and rearrangements. For example, in <italic>E.coli</italic>, a recent study of 61 genomes found that they can vary in size by more than 1 Mb (4.6–5.7 Mb), that gene order is not strongly conserved (i.e. extensive inversions/rearrangements), and that any particular <italic>E. coli</italic> genome contains only about 1/5th of the core genes of the species, while the remaining 4/5ths sample a pan-genome estimated to contain about 15,000 gene families <xref ref-type="bibr" rid="pone.0020968-Lukjancenko1">[45]</xref>. In the current study, the complete lack of genome rearrangements together with a 98.6% sequence identity, even when intergenic regions are included (which amount to more than 20% of the sequence), indicate a remarkable level of similarity between the two strains of <italic>Haloquadratum walsbyi</italic>, particularly given that they originate from samples taken at near-maximal distance on Earth.</p>
        <p>Strain-specific sequences contribute 16% of the genetic material in each strain, and include numerous integration and excision events. It is an apparent paradox that so many of these events have occurred while the shared sequences do not even show a single large-scale genome rearrangement. The availability of such highly conserved genome sequences, with synteneic shared sequences, provided an outstanding opportunity to examine, in fine detail, the differences between these strains. This revealed two unsuspected biological processes that have moulded these genomes: (a) repeat-mediated deletions and (b) deletion-coupled insertions. These processes, which are described in detail below, were able to be discerned with such clarity because (i), the positions of insertions and deletions could be determined unambiguously to single-base resolution, and (ii), the number of individual cases is large enough to exclude the possibility that they were random events.</p>
      </sec>
      <sec id="s2i">
        <title>Classifying strain-specific sequences</title>
        <p>Each of the 360 strain-specific sequences represents some type of insertion or deletion (indel), which could be classified into one of several categories (<xref ref-type="fig" rid="pone-0020968-g006">Figure 6</xref>, <xref ref-type="supplementary-material" rid="pone.0020968.s005">Table S4</xref>). About half of the indels represent mobile genetic elements (see below) while the other half are unrelated to known mobile genetic elements. The smallest indels are just longer than 20 bp (the lower cut-off size for manual inspection) while the largest is more than 100 kb (in DV12). Some of the short indels were sub-classified as “polyrepeats”, and consist of a short (e.g. 6-mer) repeated sequence (allowing for some variability), where the number of copies differs between the two strains. For polyrepeats, an exact point of insertion/deletion cannot be defined. Several polyrepeat indels are located within coding regions, where the proteins from the two strains have e.g. dipeptide repeats of different length but with the reading frame conserved.</p>
        <fig id="pone-0020968-g006" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g006</object-id>
          <label>Figure 6</label>
          <caption>
            <title>Categories of strain-specific regions, plotted according to the relative numbers in each type.</title>
            <p>Pie chart showing the types of strain specific regions in both strains. Blue: mobile genetic elements, which includes transposons and transposase-free mobile elements (MITEs, PATEs, and SMRs). Green: insertions and deletions (indels). Red: Deletion-Coupled Insertions (DCI). Long indels and DCIs (&gt; 1.5 kb) are filled colour sectors while medium (0.15–1.5 kb) or short (20–150 bp) sequences in these categories are dotted. Numbers at the outer edge of each sector are the numbers of cases in each category. Data from <xref ref-type="supplementary-material" rid="pone.0020968.s005">Table S4</xref>.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g006" xlink:type="simple"/>
        </fig>
      </sec>
      <sec id="s2j">
        <title>Deletion-coupled insertions occur at identical positions in the shared sequence</title>
        <p>An unexpected observation was that the majority of the insertions are coupled to deletions such that the two strains possess unrelated DNA sequences integrated at <underline>exactly</underline> the same position within the chromosome. Deletion-coupled insertions differ from “hot-spots of integration” where insertions and deletions usually occur within a short distance but commonly at <underline>distinct</underline> positions. Of several possible explanations for this phenomenon, we favour a process where insertion and deletion are mechanistically coupled, so resulting in unrelated sequences at exactly the same position.</p>
        <p>A random process can be excluded for statistical reasons, as 64 independent deletion-coupled insertions were identified (<xref ref-type="fig" rid="pone-0020968-g006">Figure 6</xref>, <xref ref-type="supplementary-material" rid="pone.0020968.s005">Table S4</xref>), and these showed no evidence of sequence conservation that could be interpreted as a targeting. The deletion-coupled insertions are also unlike mobile, site-specific pathogenicity islands (PI) found in bacteria <xref ref-type="bibr" rid="pone.0020968-Kers1">[46]</xref>, as those occurring at the same site are usually closely related and characteristically produce direct repeats upon integration <xref ref-type="bibr" rid="pone.0020968-Kers1">[46]</xref>, <xref ref-type="bibr" rid="pone.0020968-vanSchaik1">[47]</xref>. The results of the current study indicate that deletion-coupled insertions originate by an as yet undescribed mechanism that couples the insertion of foreign DNA to the simultaneous loss of chromosomal genes, an apparently risky strategy for individual cells as it removes part of their own genome. Our data imply that the majority of the insertions of foreign genetic material are coupled to a deletion.</p>
        <p>Insertions and associated deletions varied widely in size, with the smallest described being just over 20 bp, the applied minimal cut-off (although even shorter replacements were observed). Both, the deleted sequence and the replacing inserted sequence may be long (16–43 kb), while the longest example was 104 kb (in HBSQ001), corresponding to a 429 bp unrelated sequence in C23<sup>T</sup>. Some are likely to have a major biological impact, for example, the exchange of the S-layer glycoprotein in HBSQ001 occurs on a 43.7 kb <italic>vs</italic> 8.2 kb deletion-coupled insertion region (DV2) that has been reported as genetic island GI1 <xref ref-type="bibr" rid="pone.0020968-Legault1">[14]</xref>. Ín this case, the deleted/inserted genes are homologous and thus deletion-coupled insertion results in what has been described as divergent genes in a conserved synteneic context <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>. The exchange of the cell surface glycoprotein may be responsible for the observed morphological difference between the cell walls of the two strains <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>. The Cas/CRISPR systems for virus/plasmid defence, which are exclusively found in C23<sup>T</sup>, are also located on such deletion-coupled insertions (DV3 and DV6).</p>
      </sec>
      <sec id="s2k">
        <title>A novel, repeat-mediated deletion mechanism</title>
        <p>Another unexpected finding are differences that we interpret as resulting from repeat-mediated deletion events, where a sequence of usually &lt;20 bp in one strain is found directly repeated at both ends of a strain-specific sequence while only a single copy of the repeat exists in the corresponding position in the other strain (<xref ref-type="supplementary-material" rid="pone.0020968.s007">Table S6</xref>). In the analyzed set, there are 16 independent cases of such indels, with direct repeats ranging from 10–19 bp, and an additional 11 cases involving repeats ranging from 7–9 bp. There are also examples where the repeats are 6-mers (3), 5-mers (5) and 4-mers (6). Although short direct repeats such as 4-mers may occur by chance, less than one case (0.4) would be expected in a set of 99 indels.</p>
        <p>A sequence that occurs in only one strain represents an “indel” as there is no <italic>a priori</italic> information to indicate whether it is due to an insertion in one strain or a deletion in the other unless a distinction can be made based on additional information, such as gene truncation, preservation of conserved gene clusters, or tetra-nucleotide analysis. In nearly all repeat-associated indels (10 out of 11) where a distinction between deletion and insertion was possible, a deletion was identified as the cause.</p>
        <p>Repeat-mediated deletion events appear to be common among copies of the small mobile element HqIRS46, which contains a direct repeat of 22 bp length (<xref ref-type="supplementary-material" rid="pone.0020968.s007">Table S6</xref>). Repeat-mediated deletion shortens the element of 390 bp to concatenated terminal sequences of 52 bp.</p>
        <p>In a comparison of the genomes of <italic>Hbt. salinarum</italic> strains NRC-1 and R1, three indels were noted <xref ref-type="bibr" rid="pone.0020968-Pfeiffer1">[21]</xref> that probably also represent repeat-mediated deletions. One was a precise deletion of a copper-binding domain of the HcpB protein in NRC-1 (via a 32 bp direct repeat). The second was a 133 bp deletion in the promoter of the rRNA operon in R1 (via a 27 bp direct repeat). The third case, a 10007 bp strain-specific sequence (flanked by an 8 bp repeat) present only in NRC-1, was originally believed to be an insertion in the latter strain. Subsequent low pass sequencing of other <italic>Halobacterium</italic> strains has shown that this is a deletion in R1 (F. Pfeiffer, D. Oesterhelt, unpublished results). These examples of repeat-mediated deletions extend this biological principle to a different genus within the <italic>Halobacteriaceae</italic>, suggesting that they are widespread and possibly much more common than previously realised.</p>
        <p>These types of events are reminiscent of the repair of double-strand breaks (DSB) by a pathway described as micro-homology-mediated end-joining (MMEJ), a process that has been well studied in eukaryotes, and more recently in <italic>Archaea</italic> <xref ref-type="bibr" rid="pone.0020968-Delmas1">[48]</xref>, <xref ref-type="bibr" rid="pone.0020968-McVey1">[49]</xref>. However, we think that the observed cases of repeat-mediated deletion in the present study (and in <italic>Halobacterium</italic>) have occurred independently of double-strand breaks, as these would be expected to occur at random positions within a genome if generated by radiation or chemical radicals. It is extremely unlikely that the 4 observed core deletions in the small mobile element HqIRS46 resulted from 4 independent double-strand breaks that, by chance, have all occurred within this repeat. Also, it is quite unlikely that double-strand breaks in <italic>Halobacterium</italic> have – by chance – occurred between the prominent, closely-spaced repeats which occur in the copper-binding domain of halocyanin and in the rRNA promoter region.</p>
        <p>The core sequences removed after repeat-mediated deletion events show major size variations, with documented examples ranging from 10 bp to 34 kb (<xref ref-type="supplementary-material" rid="pone.0020968.s007">Table S6</xref>). Even shorter core deletions have been observed but the resulting events are below our length cut-off of 20 bp (for the sum of core and repeat length). In several cases, the strain not affected by the deletion shows slight sequence variation in the “direct repeat”. It can be assumed that the two copies have been mutated to identity in the other strain prior to the deletion event. Besides the deletion in HqIRS46, there are only four repeat-mediated deletions where the direct repeats are longer than 20 bp (22, 43, 139, 734 bp, respectively). In two cases (43 bp and 22 bp), this is a partial tRNA duplication related to prophage integration (see below).</p>
        <p>We propose that repeat-mediated deletions represent an, as yet, undescribed process that contributes to genetic variability. The significance of the present study is that these events have been so clearly documented in natural isolates rather than in experimentally mutagenised systems. The large number of events must reflect a process that is more common than anticipated. The observed similarity between repeat-mediated deletions described here and the MMEJ mechanism of double-strand break repair may indicate that MMEJ may make use of components that are involved in the repeat-mediated deletion process.</p>
        <p>It should be emphasized that one prominent source for direct repeats are target duplications associated with transposon insertion. The described repeat-mediated deletion would be an efficient mechanism for trace-free removal of integrated transposons and thus may represent a defence system against such selfish DNA elements. In this context, the high reversion rate of transposon-triggered mutations in <italic>Halobacterium</italic> may be significant <xref ref-type="bibr" rid="pone.0020968-Pfeifer1">[50]</xref>. Upon exit from a donor site, a transposon may leave a double-strand break (which unless repaired would lead to loss of the chromosome) or leave the target duplication as a footprint. In the latter case, most <italic>Halobacterium</italic> transposons would introduce a frame-shift, as target duplications of 5, 8, and 10 bp are most common. Thus, regain of function would be unlikely. The high reversion rate of transposon-triggered mutations can be nicely explained by the proposed mechanism of repeat-mediated deletions.</p>
      </sec>
      <sec id="s2l">
        <title>Cas/CRISPR systems</title>
        <p>In prokaryotes, Cas/CRISPR systems provide a sequence-specific defense barrier against incoming foreign DNA such as viruses or plasmids, and the genes involved often display a high rate of change <xref ref-type="bibr" rid="pone.0020968-Karginov1">[19]</xref>. The two <italic>Haloquadratum</italic> strains differ radically in their Cas/CRISPR systems, as summarized in <xref ref-type="fig" rid="pone-0020968-g007">Figure 7</xref>. Strain C23<sup>T</sup> carries two complete sets of Cas genes, one preceding the crispr-1 locus, and a second, distinct set located between the flanking crispr-2 and crispr-3 loci. The DR sequence found in crispr-1 differs from that present in the other two CRISPR loci, consistent with their distinct Cas genes. There are 85 spacers spread across the three loci but crispr-1 has duplicates of two spacers, and crisprs-2 and -3 share an identical spacer, making a total of 82 unique spacers. Strain HBSQ001 has no Cas genes and only 5 complete spacers. A MITE (HqIRS37) has inserted within the spacer between the third and fourth copy of the DR (counting from the leader sequence end). The residual HBSQ001 crispr locus is closely related to crispr-2 of C23<sup>T</sup> as they share the same leader and DR sequences (labeled DR2 in <xref ref-type="fig" rid="pone-0020968-g007">Figure 7</xref>), but the lack of Cas genes in HBSQ001 means that the CRISPR system in that strain is non-functional.</p>
        <fig id="pone-0020968-g007" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0020968.g007</object-id>
          <label>Figure 7</label>
          <caption>
            <title>CRISPR systems of <italic>Hqr. walsbyi</italic> strains C23<sup>T</sup> and HBSQ001.</title>
            <p>Three CRISPR loci are present in C23<sup>T</sup>, associated with two separate groups of Cas genes (coloured yellow and pink). No Cas genes, and only one, residual CRISPR with 5 spacers, are found in HBSQ001. The DR (direct repeat) sequences are shown beneath the CRISPRs. Shading between the leader sequence of CRISPR-2 of C23<sup>T</sup> and CRISPR-1 of HBSQ001 indicate that they are nearly identical in sequence. The relative position and orientation of ORF HB2151A is shown along with an arrow indicating where it matches (exactly) a spacer sequence in CRISPR-3 of C23<sup>T</sup>. Similarly, the spacer sequences in HBSQ001 that are closely similar to sequences in C23<sup>T</sup> are indicated by arrows, labeled spacer-related sequences, which point to their matching locations in the C23<sup>T</sup> genome. Nucleotide positions are given beneath the CRISPRs and details of the matching spacer sequences are given in <xref ref-type="supplementary-material" rid="pone.0020968.s008">Table S7</xref>.</p>
          </caption>
          <graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.g007" xlink:type="simple"/>
        </fig>
        <p>The first CRISPR locus in C23<sup>T</sup> has no homologue in HBSQ001, as it is located on a long deletion-coupled insertion between the two strains (25.4 kb <italic>vs</italic> 6.6 kb), representing one of the variable regions (DV3, <xref ref-type="table" rid="pone-0020968-t003">Table 3</xref>; #88 of <xref ref-type="supplementary-material" rid="pone.0020968.s006">Table S5</xref>). The segment carrying crispr-2 and -3 is also part of a long deletion-coupled insertion between the strains (15.5 kb <italic>vs</italic>. 5.2 kb, #294), one of 21 strain-specific sequences that constitute hypervariable region DV6 (genomic island GI 2).</p>
        <table-wrap id="pone-0020968-t003" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0020968.t003</object-id><label>Table 3</label><caption>
            <title>Prominent strain-specific chromosomal regions of strains C23<sup>T</sup> and HBSQ001.</title>
          </caption><!--===== Grouping alternate versions of objects =====--><alternatives><graphic id="pone-0020968-t003-3" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.t003" xlink:type="simple"/><table>
            <colgroup span="1">
              <col align="left" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
            </colgroup>
            <thead>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV<xref ref-type="table-fn" rid="nt106">a</xref></td>
                <td align="left" colspan="1" rowspan="1">GI<xref ref-type="table-fn" rid="nt107">b</xref></td>
                <td align="left" colspan="1" rowspan="1">Region<xref ref-type="table-fn" rid="nt108">c</xref> #</td>
                <td align="left" colspan="1" rowspan="1">Pos. C23<sup>T</sup> (nt)</td>
                <td align="left" colspan="1" rowspan="1">Pos. HBSQ001 (nt)</td>
                <td align="left" colspan="1" rowspan="1">%G+C (C23<sup>T</sup>/HBSQ001)</td>
                <td align="left" colspan="1" rowspan="1">Description</td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV1</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#24</td>
                <td align="left" colspan="1" rowspan="1">108448 – 129921</td>
                <td align="left" colspan="1" rowspan="1">104716 – 130155</td>
                <td align="left" colspan="1" rowspan="1">57.7/57.9</td>
                <td align="left" colspan="1" rowspan="1">the <italic>hmuI</italic> gene excluding the N-terminal and C-terminal regions which are common to both strains.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV2</td>
                <td align="left" colspan="1" rowspan="1">GI 1</td>
                <td align="left" colspan="1" rowspan="1">#50</td>
                <td align="left" colspan="1" rowspan="1">260966 – 269209</td>
                <td align="left" colspan="1" rowspan="1">257293 – 301070 (257397–302834)</td>
                <td align="left" colspan="1" rowspan="1">46.4/48.6</td>
                <td align="left" colspan="1" rowspan="1">a 43.7 kb <italic>vs</italic> 8.2 kb deletion-coupled insertion (DCI); codes for S-layer glycoprotein and other cell-surface glycoproteins.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV3</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#88</td>
                <td align="left" colspan="1" rowspan="1">392540 – 417971</td>
                <td align="left" colspan="1" rowspan="1">421964 – 428610</td>
                <td align="left" colspan="1" rowspan="1">48.5/47.8</td>
                <td align="left" colspan="1" rowspan="1">a 25.4 kb <italic>vs</italic> 6.6 kb DCI<xref ref-type="table-fn" rid="nt109">d</xref>; contains a CRISPR in C23<sup>T</sup> and a transcription unit coding for a probable cell surface structure in HBSQ001.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV4</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#120</td>
                <td align="left" colspan="1" rowspan="1">560071 – 588686</td>
                <td align="left" colspan="1" rowspan="1">-</td>
                <td align="left" colspan="1" rowspan="1">47.3/-</td>
                <td align="left" colspan="1" rowspan="1">28.6 kb deletion in HBSQ001.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV5</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#162</td>
                <td align="left" colspan="1" rowspan="1">746237 – 757917</td>
                <td align="left" colspan="1" rowspan="1">727486 – 738373</td>
                <td align="left" colspan="1" rowspan="1">49.8/46.5</td>
                <td align="left" colspan="1" rowspan="1">11.6 kb <italic>vs</italic> 10.8 kb DCI</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV6</td>
                <td align="left" colspan="1" rowspan="1">GI 2</td>
                <td align="left" colspan="1" rowspan="1">#264–#314</td>
                <td align="left" colspan="1" rowspan="1">1268551 –1472326</td>
                <td align="left" colspan="1" rowspan="1">1269154 – 1454284 (1272280–1457646)</td>
                <td align="left" colspan="1" rowspan="1">50.0/50.0</td>
                <td align="left" colspan="1" rowspan="1">Hypervariable region of ∼180 kb in which a total of 21 independent insertion/deletion events were detected. Includes DCI #292 which covers the crispr-2 and crispr-3 loci of C23<sup>T</sup>.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV7</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#370</td>
                <td align="left" colspan="1" rowspan="1">1615793–1635538</td>
                <td align="left" colspan="1" rowspan="1">-</td>
                <td align="left" colspan="1" rowspan="1">44.5/-</td>
                <td align="left" colspan="1" rowspan="1">prophage integration in C23<sup>T</sup></td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV8</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#388</td>
                <td align="left" colspan="1" rowspan="1">1695969–1730200</td>
                <td align="left" colspan="1" rowspan="1">-</td>
                <td align="left" colspan="1" rowspan="1">45.0/-</td>
                <td align="left" colspan="1" rowspan="1">34 kb repeat-triggered deletion of 34 kb in HBSQ001</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV9</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#480</td>
                <td align="left" colspan="1" rowspan="1">2046794–2069450</td>
                <td align="left" colspan="1" rowspan="1">1976050–1989452</td>
                <td align="left" colspan="1" rowspan="1">50.0/47.6</td>
                <td align="left" colspan="1" rowspan="1">22.6 kb vs 13.4 kb DCI</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV10</td>
                <td align="left" colspan="1" rowspan="1"/>
                <td align="left" colspan="1" rowspan="1">#578</td>
                <td align="left" colspan="1" rowspan="1">2411939–2454921</td>
                <td align="left" colspan="1" rowspan="1">2275947–2275978</td>
                <td align="left" colspan="1" rowspan="1">48.6/-</td>
                <td align="left" colspan="1" rowspan="1">42.9 kb insertion associated with loss of 32 bp. The C23<sup>T</sup> sequence contains many transposons and a phage integrase, and is possibly an integrative plasmid or a defective prophage. Four of the transposons from DV10 appear to have spread to other genome regions (i.e. ‘outbreaks’).</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV11</td>
                <td align="left" colspan="1" rowspan="1">GI 3</td>
                <td align="left" colspan="1" rowspan="1">#658</td>
                <td align="left" colspan="1" rowspan="1">-</td>
                <td align="left" colspan="1" rowspan="1">2602626–2655217 (2602766–2661746)</td>
                <td align="left" colspan="1" rowspan="1">-/51.2</td>
                <td align="left" colspan="1" rowspan="1">a 52 kb strain-specific insertion in HBSQ001; targets a tRNA and contains an adjacent phage integrase; it is uncertain whether this is an integrative plasmid or a prophage</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">DV12</td>
                <td align="left" colspan="1" rowspan="1">GI 4</td>
                <td align="left" colspan="1" rowspan="1">#682–#704</td>
                <td align="left" colspan="1" rowspan="1">2952488–3024116</td>
                <td align="left" colspan="1" rowspan="1">2798883–3008134 (2799525–3012525)</td>
                <td align="left" colspan="1" rowspan="1">50.0/49.4</td>
                <td align="left" colspan="1" rowspan="1">two major strain specific insertions over this region, one of which is over 100 kb in length</td>
              </tr>
            </tbody>
          </table></alternatives><table-wrap-foot>
            <fn id="nt106">
              <label>a</label>
              <p><bold>DV</bold>, major divergent regions between the strains are numbered 1–12 and the corresponding positions in both strains given.</p>
            </fn>
            <fn id="nt107">
              <label>b</label>
              <p><bold>GI</bold>, genomic islands, as assigned by <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>. Positions are according to the strain comparison analysis. In the cases of assigned genomic islands, the positions for HBSQ001 from <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref> are also included in parentheses.</p>
            </fn>
            <fn id="nt108">
              <label>c</label>
              <p>Region # refers to the numbered common and strain-strain specific sequences listed in <xref ref-type="supplementary-material" rid="pone.0020968.s006">Table S5</xref>.</p>
            </fn>
            <fn id="nt109">
              <label>d</label>
              <p><bold>DCI</bold>, deletion-coupled insertion.</p>
            </fn>
          </table-wrap-foot></table-wrap>
        <p>A summary of the significant sequence matches between CRISPR spacer sequences and haloarchaeal or halovirus sequences is given in <xref ref-type="supplementary-material" rid="pone.0020968.s008">Table S7</xref>. None of the CRISPR spacers exactly matched known haloviruses or haloarchaeal plasmids, but a less stringent search against the PL6 plasmids revealed one C23<sup>T</sup> spacer that gave an 80% match (29/36 nt) to a sequence within ORF Hqrw_6004 of plasmid PL6A (crispr-2, number 17, <xref ref-type="supplementary-material" rid="pone.0020968.s008">Table S7</xref>), and the predicted amino acid sequence of this spacer and the corresponding section of the PL6A ORF are nearly identical (11/12 aa). This is not surprising given the widespread carriage of PL6-like plasmids in <italic>Haloquadratum</italic>. Twelve C23<sup>T</sup> spacers were found to closely match metavirome (virus metagenome) sequences from the Santa Pola saltern, Spain <xref ref-type="bibr" rid="pone.0020968-Santos1">[51]</xref>. Four of these spacers perfectly matched metavirome target sequences. On the other hand, 4 of the metavirome contigs were targeted by more than one spacer, suggesting they belong to viruses that frequently interact with <italic>Haloquadratum</italic>. The close sequence relationships found between C23<sup>T</sup> CRISPR spacer sequences and the Spanish saltern metavirome provide persuasive evidence that the dominant virus types detected in the saltern from which HBSQ001 was isolated are also present in Australia.</p>
        <p>Although no spacer sequences were shared by the two strains, four spacers matched sequences adjacent to the CRISPRs of the <underline>other</underline> strain (allowing for 1–2 mutations; <xref ref-type="fig" rid="pone-0020968-g007">Figure 7</xref>, <xref ref-type="supplementary-material" rid="pone.0020968.s008">Table S7</xref>). These spacer sequences target two Cas genes and a predicted endonuclease. Three spacers of HBSQ001 closely match sites within the block of Cas genes found between the crispr-2 and crispr-3 loci of C23<sup>T</sup>. One of these matching sequences was found after reconstructing the ancestral HBSQ001 spacer by removing the inserted HqIRS37 MITE (<xref ref-type="supplementary-material" rid="pone.0020968.s008">Table S7</xref>). On the other hand, one spacer of C23<sup>T</sup> (crispr3/spacer20) exactly matches a 35 nt sequence in HBSQ001 that is found within an ORF (HQ2151) located just 1.2 kb away from the CRISPR remnant of that strain. The HQ2151 protein is predicted to be a HNH restriction endonuclease (COG3440), genes for which are often carried by bacteriophages, haloviruses <xref ref-type="bibr" rid="pone.0020968-Tang1">[52]</xref> and plasmids. Spacer sequences matching integrated viruses or plasmids in other strains of the same species have been previously reported, eg. in <italic>Sulfolobus islandicus</italic> <xref ref-type="bibr" rid="pone.0020968-Held1">[20]</xref>, consistent with the immunity function of CRISPRs.</p>
        <p>It can be assumed that the crispr3/spacer20 sequence in C23<sup>T</sup> was collected precisely to defend against incoming mobile elements carrying the HQ2151A endonuclease, so the presence of this gene in the Spanish isolate probably represents a successful invasion by such an element despite the CRISPR protection. If the ancestor of the Spanish strain also carried a spacer against this endonuclease, then its integration into the chromosome would have triggered a deletion of at least the cognate spacer, and possibly more, as has been observed in other <italic>Archaea</italic> <xref ref-type="bibr" rid="pone.0020968-Gudbergsdottir1">[53]</xref>, <xref ref-type="bibr" rid="pone.0020968-Stern1">[54]</xref> It seems likely that nearly the complete Cas/CRISPR locus has been deleted in HBSQ001 by such an event, leaving only the observed remnant. Deletions within CRISPRs <xref ref-type="bibr" rid="pone.0020968-Deveau1">[55]</xref> are well documented, as is the loss of entire sets of Cas genes <xref ref-type="bibr" rid="pone.0020968-Touchon1">[56]</xref>. It is not clear why the inter-species spacer targets are close to CRISPRs but it may relate to the common occurrence of Cas/CRISPR genes on plasmids, which would be sensed as foreign DNA when transferred to new strains. This is consistent with phylogenetic studies in Bacteria showing that Cas/CRISPR systems appear to spread by lateral gene transfer <xref ref-type="bibr" rid="pone.0020968-Horvath1">[57]</xref>, and indeed the evidence from tetra-nucleotide composition suggests that both CRISPR loci are contained on regions of foreign DNA. It is likely then that plasmid transfers between <italic>Hqr. walsbyi</italic> strains occur at an appreciable rate, although the mechanism(s) involved have yet to be experimentally determined. Overall, the spacer/target sequence relationships observed in the CRISPRs of the two strains, and between the Spanish saltern virome and spacers of C23<sup>T</sup>, show that the two strains are exposed to incursions by closely related organisms, including viruses and plasmids. This evidence points to a rapid, global dispersal system that links the salterns in Australia and Spain, as if they were a global pond (discussed later).</p>
      </sec>
      <sec id="s2m">
        <title>Halomucins, cell surface proteins, and bacteriorhodopsins</title>
        <p>Strain C23<sup>T</sup> carries an orthologue of halomucin, HmuI, an extremely long protein first described for HBSQ001 <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>. The HBSQ001 protein is predicted to be 9,159 aa (27,477 bp) while that of C23<sup>T</sup> is 7,837 aa. HmuI is secreted, and surrounds the cells as a cloud of protein as shown by a specific antibody stain (results not shown). It is a highly glycosylated (sialylated) protein, and could play a role in protecting the cell against conditions of desiccation or extremely low water activity <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>. Although the HmuI of C23<sup>T</sup> is considerably shorter, the two halomucins retain blocks of high sequence conservation with more than 90% sequence identity, especially in the N-terminal and C-terminal regions. They possess the same major features, an N-terminal Sec-pathway signal sequence, stretches of multiple direct repeats that span much of the protein, and a serine/glycine rich region near the C-terminus. However, the C-type lectin-like (CTLD) domains carried by the HBSQ001 protein are absent in the C23<sup>T</sup> HmuI. Comparison at the DNA sequence level indicates that the <italic>hmuI</italic> genes must have diverged <italic>in situ</italic> from a common ancestral gene rather than by lateral gene transfer events as both are embedded within a common genomic region that extends into each gene for 119 codons at the N-terminus, and for 559 codons at the C-terminus. Between the flanking core sequences the genes vary in sequence, and this region is designated as divergent region (DV1, #24). The repetitive domain structure of halomucin is likely to have promoted length changes by standard homologous recombination or repeat-mediated deletion, and two independent deletions can be discerned; one of 2.4 kb (HBSQ001 codons 283-1075) which removed the pair of CTLD domains, and one of 1.3 kb (HBSQ001 codons 5078-5538). There are also several other, smaller insertions and deletions. When the HmuI of HBSQ001 was first described in 2006 there were no prokaryotic homologues in the sequence databases but current BLASTP searches of the Genbank database show matches to large, secreted proteins from a number of recently sequenced <italic>Archaea</italic> (including haloarchaea) and <italic>Bacteria.</italic> These proteins, often annotated as large exoproteins, GLUG-domain proteins, or filamentous haemagglutinins, are considerably shorter in length than even the C23<sup>T</sup> HmuI, with most being between 1,000–2000 aa. The closest sequence was that of Ig-like domain-containing protein Swol_1078 of <italic>Syntrophomonas wolfei</italic> (E = 10<sup>−44</sup>). Two groups of matching sequences were observed; those similar to the HBSQ001-specific CTLD domain containing region (aa 500 – 1,060), and the great majority that matched to the repeats (approx. aa 1,500–5,500 of C23<sup>T</sup> HmuI). The similar repeat domains of the latter group of proteins probably reflect a common structural characteristic, such a filamentous shape, and it is possible that HmuI provides an attachment function.</p>
        <p>Two additional proteins with characteristics similar to halomucin (very large and probably secreted glycoproteins) have been reported for HBSQ001: Hmu2 (2885 residues) and Hmu3 (2079 residues). As these are encoded on strain-specific sequences (<italic>hmu2</italic> on #50, <italic>hmu3</italic> on #692), similar proteins are not present in C23<sup>T</sup>.</p>
        <p>The cell walls of most haloarchaea, excluding halococci, typically comprise a single protein layer, the surface or S-layer that lies directly outside the cell membrane. Experimental evidence from a few haloarchaeal species indicates that the S-layer is composed of only one protein that is usually heavily glycosylated, and forms an interlocking two-dimensional protein sheet attached to the cell membrane <xref ref-type="bibr" rid="pone.0020968-YuristDoutsch1">[58]</xref>, <xref ref-type="bibr" rid="pone.0020968-Sumper1">[59]</xref>. Haloarchaeal genome sequences often have multiple genes annotated as cell surface (glyco)proteins, based largely on the presence of secretion and glycosylation signals. Strain C23<sup>T</sup> has twelve such ORFs but only one, Hqrw_1237 (822 aa), is closely related to known S-layer glycoproteins. The highest similarity is to the S-layer protein of <italic>Har. japonica</italic> (Q9C4B4, 64% aa identity). A second, much smaller ORF, Hqrw_1408 (222 aa), present in both strains, shows about 40% aa identity to the C-terminal 140 residues of known S-layer proteins. The remaining annotated surface proteins share no significant similarity to S-layer proteins. Mass spectrometry analysis of membrane preparations of C23<sup>T</sup> revealed the presence of Hqrw_1237, as expected, while Hqrw_1408 was not detected. In addition, three of the other predicted surface proteins were unambiguously identified; Hqrw_1240, Hqrw_1641 and Hqrw_2184. The functions of the latter proteins are unknown.</p>
        <p>The probable S-layer glycoprotein genes (<italic>csg</italic>) of C23<sup>T</sup> (Hqrw_1237) and of HBSQ001 (HQ1207A) are located on a long deletion-coupled insertion (43.7 kb <italic>vs</italic> 8.2 kb, #50). In the latter strain, this had previously been reported to be a hypervariable region, and denoted as genomic island GI 1 <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>. The S-layer protein of C23<sup>T</sup> aligns to homologues from other halophiles beginning from its N-terminus, while the orthologue of HBSQ001 appears to have an extended N-terminal region. Only from aa 193 onwards does it show significant sequence identity (55%) to the C23<sup>T</sup> homologue Hqrw_1237 (starting at pos. 55), and to the S-layer glycoproteins from other halophiles. However, the N-terminal 192 aa show only very distant homology (if any) to proteins from C23<sup>T</sup> or other halophiles. Nevertheless, the protein has a signal sequence and is one of the major membrane proteins in HBSQ001 (Fusetti &amp; Pohlmann, unpublished results). Thus, it is likely to be the S-layer glycoprotein of the Spanish isolate.</p>
        <p>The similarities in the C-terminal region between the probable S-layer glycoproteins from the two strains could have been caused by a genome rearrangement within the coding region, but this is not the case, as the level of nucleotide sequence conservation (55%) is drastically lower than the average similarity of other orthologue pairs (97%). The genomic breakpoint has been unambiguously positioned and is located outside of the <italic>csg</italic> gene. Cuadros-Orellana <italic>et. al</italic> <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref> suggested that some of the variability of this region (GI1) could be due to recombination with related sequences elsewhere on the chromosome. However, this seems less plausible in C23<sup>T</sup> as there is little intragenomic similarity. Thus, in contrast to the situation described for the halomucin gene, the <italic>csg</italic> gene has most likely been acquired by lateral gene transfer in the Spanish isolate. A third form of the cell surface glycoprotein was found on fosmid eHwalsbyi 559 also within the same synteneic gene context <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>.</p>
        <p>The HBSQ001 specific 43.7 kb DV2 sequence codes not only for a distinct <italic>csg</italic> and the additional <italic>hmu2</italic> gene, but also for 5 additional proteins that are probably secreted and glycosylated. These differences are likely to be responsible for the three-layered cell wall structure observed in HBSQ001 compared to the two-layered surface of C23<sup>T</sup> <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>. The combination of a long <italic>ca</italic>. 150 aa N-terminal extension on the S-layer protein of HBSQ001, and the additional cell surface glycoproteins in this strain, are probably important in altering the outer cell surface to evade viral infection, and may have helped HBSQ001 survive such attacks despite the absence of a functional CRISPR system. There is also a potentially significant difference in the glycosylation pattern of surface and secreted proteins between the two isolates. Sialic acid synthesis genes are present in HBSQ001 but absent in C23<sup>T</sup>, as they occur on a deletion-coupled insertion (#690, within DV12).</p>
        <p>Both strains carry two different bacterio-opsin-like genes, annotated as bop1 and bop2, along with a single halo-opsin gene (hop). To check for the expression of these and other membrane proteins, cell membranes of the C23<sup>T</sup> strain were purified using the protocol for isolating purple-membranes of <italic>Halobacterium salinarum</italic> <xref ref-type="bibr" rid="pone.0020968-Oesterhelt1">[60]</xref>, and a red-membrane band was observed at the same position on sucrose gradients as purple membranes from <italic>Halobacterium salinarum</italic>. SDS-PAGE revealed a single, prominent Coomassie stained band of identical apparent MW to Bop of <italic>Hbt. salinarum</italic> (data not shown), and MS analysis of the excised band confirmed it to be BopI (<xref ref-type="supplementary-material" rid="pone.0020968.s004">Table S3</xref>). In the previous study of HBSQ001, BopI was also stated to be present in the cell membrane <xref ref-type="bibr" rid="pone.0020968-Bolhuis1">[7]</xref>. Minor protein components of the purified membranes of the C23<sup>T</sup> strain were also detected, among which were BopII, Hop and Brp-like protein (carotenase) (<xref ref-type="supplementary-material" rid="pone.0020968.s004">Table S3</xref>). Recently, BopII of <italic>Hqr. walsbyi</italic> has been found not to pump protons, and its actual function in the cell is yet to be determined <xref ref-type="bibr" rid="pone.0020968-Fu1">[61]</xref>.</p>
      </sec>
      <sec id="s2n">
        <title>Prophages</title>
        <p>The genomes of both strains were examined for prophages using a number of methods including searches for insertions into tRNA genes, similarity to known halovirus (or prophage) genomes, and to highly conserved virus genes (e.g. terminase).</p>
        <p>As mentioned above, genomic island 3 (GI 3, DV11, #658) may represent a strain specific prophage in HBSQ001. Another likely prophage of C23<sup>T</sup> occurs at nt 1615793–1635538, where a 19.7 kb virus-like region has integrated into the tRNA-Ala gene. This is a strain specific insertion (#370, DV7) with an average %G+C content of 44.5, about 3 percentage points lower than that of the entire chromosome, and an altered tetra-nucleotide composition. A phage integrase is adjacent to the right end, which is marked by a 22 nt duplication of the tRNA. The ORFs between are typical of bacteriophages, including DNA polymerase, restriction/modification enzymes and a cluster of several ORFs that overlap at start/stop codons and are likely to be transcribed as a single unit. The DNA polymerase (Hqrw_2609), which is adjacent to the left end of the tRNA gene, is most similar to that of halovirus HF1, but is interrupted by a frame-shift, so this prophage is either defective or relies on the host enzyme for replication.</p>
        <p>Common to both strains is a partial duplication of the tRNA-Thr1 gene that occurs 6.7 kb further downstream (nt 103,049–103,105 in C23<sup>T</sup>). The region between contains genes for hypothetical and conserved hypothetical proteins, along with many pseudogenes, some being <italic>cdc6</italic> homologues. The partial tRNA-Thr1 copy is imperfect, with 5 mismatches over 57 nt, and is missing 15 nt at the 5′ end. There are no integrase genes nearby. A similarly truncated tRNA-Thr gene is also carried by haloviruses HF1 and HF2, but these occur nearby a predicted integrase gene, presumably to allow host integration <xref ref-type="bibr" rid="pone.0020968-Tang1">[52]</xref>. It is likely that this 6.7 kb region of <italic>Haloquadratum</italic> is a remnant of either a prophage or an integrative plasmid.</p>
        <p>As discussed above, plasmid PL6 may represent a prophage in C23<sup>T</sup>. Whether the larger plasmid, PL100, is a prophage or a plasmid is difficult to predict as it shares no significant similarity to known haloviruses and has a high proportion of hypothetical or conserved hypothetical ORFs. While ORF Hqrw_5003 shows almost perfect similarity to a metavirome sequence derived from the Santa Pola saltern in Spain (ADE29299), and several other ORFs show significant similarity to halovirus or bacteriophage ORFs, many of these are similar in function to genes also typically carried by plasmids, such as endonuclease, helicase, integrase, and Cdc6 homologues. The presence of a Cdc6 homologue cannot be taken as evidence that PL100 is simply a plasmid, as halovirus BJ-1 carries such a gene <xref ref-type="bibr" rid="pone.0020968-Pagaling1">[62]</xref>.</p>
      </sec>
      <sec id="s2o">
        <title>Divergent regions, including genomic islands (GI) of strain HBSQ001</title>
        <p>Four hypervariable regions (genomic islands, GI 1–4) of HBSQ001 were described by Cuadros-Orellana <italic>et al.</italic> <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref> (<xref ref-type="fig" rid="pone-0020968-g005">Fig 5</xref>). These show high variability between the HBSQ001 genome and environmental sequences recovered from the saltern from which this strain was isolated (i.e. autochthonous DNA). Since GIs 1–4 largely correspond to divergent regions (DV) observed in the current study (described above), only relevant information that has not already described will be given here. Genomic Island 1 corresponds to DV2 (see above). Genomic Island 2 is 185 kbp in length (nt 1269154 – 1454284, #264 – #314 of <xref ref-type="supplementary-material" rid="pone.0020968.s006">Table S5</xref>) and corresponds to a similarly sized region, DV6, in C23<sup>T</sup> (203 kb; nt 1268551 – 1472326). The latter region harbors many IS elements, phage-associated, and metabolic genes. Unlike most other GIs and DVs, which display a single large indel between the two strains, GI 2 shows 21 independent indels or deletion-coupled insertions, several of which exceed 10 kb. These strain-specific sequences are interspersed among core sequences ranging from as little as 43 bp to 12 kb. Commonly, the indels are separated by not more than 5 kb (<xref ref-type="supplementary-material" rid="pone.0020968.s006">Table S5</xref>). The whole region has an increased G+C content (50.0%), an altered tetra-nucleotide frequency, is devoid of tRNAs and ribosomal protein genes, and is located mid-way between <italic>ori1</italic> and <italic>ori2</italic>/<italic>ori3.</italic> GI 2 (DV6) is particularly abundant in non-transposase related pseudogenes (<xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>). The clustering of numerous distinct indels/replacements in a probable replication termination region is curious, but appears similar to observations in strains of <italic>Sulfolobus islandicus</italic> recovered from widely spaced sites (see <xref ref-type="fig" rid="pone-0020968-g003">Figure 3</xref> of <xref ref-type="bibr" rid="pone.0020968-Reno1">[33]</xref>).</p>
        <p>GI 2 (DV6) begins abruptly in HBSQ001 just after nt 1269163 (strain-specific region #264) where a large deletion has removed most of ORF HQ2048A. Comparison with C23<sup>T</sup> shows this is a deletion because the corresponding C23<sup>T</sup> ORF, Hqrw_2212, remains intact. The ends of the undeleted region in C23<sup>T</sup> have a direct repeat, <named-content content-type="gene" xlink:type="simple">ACATCATTCT</named-content>, while in HBSQ001 there is only one copy directly at the deletion border. This is one of the cases of a repeat-mediated deletion, which has eliminated just over 16 kb from the HBSQ001 genome. Fortuitously, among the autochthonous DNA sequences recovered from the Santa Pola saltern in Spain by <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>, was a 15 kb fosmid clone sequence (EF583981) that shared a similar %G+C as <italic>Hqr. walsbyi</italic> but did not match any sequence in HBSQ001. In the current study this sequence was found to be 99% identical to C23<sup>T</sup> across its entire length, and mapped within this 16 kb deletion in GI 2 (nt 1268551–1284680). This example further supports the tight relationship between <italic>Hqr. walsbyi</italic> populations present at the two distinct global sites.</p>
        <p>A significant strain-specific difference found within GI 2 (DV6) is the CRISPR array and associated Cas genes of C23<sup>T</sup> (see above). In this case C23<sup>T</sup> has 15.5 kb relative to 5.2 kb in HBSQ001 (#294). The comparative sequence data is consistent with HBSQ001 suffering a deletion relative to C23<sup>T</sup>, as the leader and first DR are contiguous with the shared sequence between the two strains (<xref ref-type="fig" rid="pone-0020968-g007">Figure 7</xref>), but after this HBSQ001 has only a few remnant spacers left while C23<sup>T</sup> retains a full set of spacers as well as Cas genes.</p>
        <sec id="s2o1">
          <title>Divergent region DV3</title>
          <p>The genes carried by the two strains in this deletion-coupled insertion (25.4 kb <italic>vs</italic> 6.6 kb, #88) are quite different, and the ancestral strain can be identified by ORF HQ1332A, which spans the replacement border in HBSQ001 and appears to be complete, while the corresponding ORF in C23<sup>T</sup> is truncated, indicating that the former strain has the original sequence. DV3 in C23<sup>T</sup> carries not only the Cas/CRISPR system of the crispr-1 locus, but also genes typically found in mobile DNA elements, such as phages or plasmids (integrase, restriction and modification enzymes, transposons). In HBSQ001, DV3 has genes that are probably all involved in the synthesis of a cell surface structure. Type IV pilus biogenesis proteins (<italic>pilB</italic>/<italic>pilC</italic> homologues) are followed by 5 genes, of which two (HQ1332A, HQ1334A) have a predicted class III signal peptidase (preflagellin peptidase) cleavage site according to PilFind <xref ref-type="bibr" rid="pone.0020968-Szabo1">[63]</xref>. Class III signal peptides are believed to be particularly important for the biogenesis of archaeal cell surface appendages, and proteins containing class III signal peptides frequently occur in operons together with pilus/flagella assembly systems (<italic>pilBC</italic> or <italic>flaIJ</italic> homologues) <xref ref-type="bibr" rid="pone.0020968-Ellen1">[64]</xref>. Genes highly homologous to the probable cell surface components from DV3 of HBSQ001 are found in other halophilic archaea, and it is likely that this structure is used by many other halophiles.</p>
        </sec>
        <sec id="s2o2">
          <title>Genomic Island 3: a tRNA insertion (DV11)</title>
          <p>This represents a 52 kb long (nt 2602626 – 2655217) strain-specific insertion in HBSQ001 (#658). Cuadros-Orellana <italic>et al</italic>. <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref> suggested that GI 3 may represent ‘the remnant of a lysogenic phage’. By comparison with C23<sup>T</sup>, this insertion begins at a tRNA-His gene (HQt41, nt 2602504) and ends with a 43 nt repeat of the 3′ end of the same tRNA (at nt 2655218–2655260). A phage integrase gene immediately downstream of the left-end tRNA is closely related to the integrase gene of halovirus HF2 (NP_542565; 35% aa identity), which is itself adjacent to a truncated tRNA gene (HF2t004) in the HF2 genome <xref ref-type="bibr" rid="pone.0020968-Tang2">[65]</xref>. More closely related homologues of this integrase occur in the genomes of <italic>Hfx. volcanii</italic> (HVO_2815A), <italic>Hgm. borinquense</italic> (ZP_03999761) and <italic>Nab. magadii</italic> (Nmag_0465). In all cases these are located next to a complete tRNA gene (all are tRNA-Glu), while a truncated sequence of the same tRNA is found a further 5–48 kb downstream. This arrangement is typical of integrative prophages <xref ref-type="bibr" rid="pone.0020968-Casjens1">[66]</xref> and the insertion carries several possible phage-associated genes (an additional integrase, helicases and restriction-systems). It also includes many hypothetical ORFs, Cdc6 and Cdc48 homologues (HQ3268A, HQ3269A, HQ3297A), as well as type IV secretory pathway related genes (HQ3289A, HQ3291A). The comparative data indicate it is able to target the tRNA-His gene via a partial copy of the same sequence, and probably has the capacity to excise (via the adjacent integrase), recircularize, and replicate via the Cdc6 genes. There is no recognisable phage terminase gene <xref ref-type="bibr" rid="pone.0020968-Casjens1">[66]</xref> and without more information it is uncertain whether it is an integrative plasmid or a prophage (defective or functional).</p>
        </sec>
        <sec id="s2o3">
          <title>Genomic Island 4 (DV12)</title>
          <p>At 209 kb (nt 2798883-3008134, #682-#704) this was the largest GI detected by Cuadros-Orellana <italic>et al</italic>. <xref ref-type="bibr" rid="pone.0020968-CuadrosOrellana1">[13]</xref>. HBSQ001 shows two major strain-specific insertions over this region, one of which is over 100 kb in length. Looking more closely, these were found to be coupled to deletions, the first (#692) being 0.4 kb (C23<sup>T</sup>) <italic>vs</italic> 105 kb (HBSQ001; nt 2841233–2945868), and the second (#698) being 5.4 kb (C23<sup>T</sup>) <italic>vs</italic> 33.6 kb (HBSQ001; 2949791–2983415) (<xref ref-type="fig" rid="pone-0020968-g001">Figure 1</xref>, <xref ref-type="fig" rid="pone-0020968-g005">Figure 5</xref>, and <xref ref-type="supplementary-material" rid="pone.0020968.s006">Table S5</xref>). The region of C23<sup>T</sup> from nt 2993444 to 3008735 (ORFs Hqrw_3995 to Hqrw_4025), which spans both of the latter deletion-coupled insertions, shows a prominent deviation in tetramer frequency and higher than average %G+C, indicative of foreign DNA. The origin of the latter element is unclear as most ORFs are either hypothetical or conserved hypothetical, and many have degenerated to pseudogenes, including a transposase at the left end (Hqrw_3995) that may have originally facilitated its integration. The general characteristics of this element are similar to another region of C23<sup>T</sup>, Hqrw_2042 to Hqrw_2066, which also shows an increased %G+C, divergent tetramer composition, begins with a phage integrase pseudogene, and contains largely hypothetical/conserved hypothetical ORFs, many of which have degenerated to pseudogenes. However, this region is conserved between the two strains.</p>
        </sec>
      </sec>
      <sec id="s2p">
        <title>Mobile genetic elements (MGEs)</title>
        <p>Mobile, integrative genetic elements, because of their repetitive and propagative nature, are significant drivers of cellular evolution, inactivating genes and generating multiple copies that can lead to deletions and other genomic rearrangements <xref ref-type="bibr" rid="pone.0020968-Hickman1">[67]</xref>. Knowledge of these elements in a genome can help explain its structure, evolution and propensity for change, but identifying them can be difficult, particularly if they are novel, short, or show some variability in sequence. The unexpectedly close similarity of the two <italic>Haloquadratum</italic> strains provided an extraordinary advantage in detecting such elements. A surprising finding from these comprehensive searches is that despite both strains carrying numerous mobile genetic elements, their shared sequences have remained synteneic. Rather, there is a strong tendency for MGEs to target other MGEs.</p>
        <p>Both strains carry large numbers of MGEs: a total of 528 for C23<sup>T</sup> and 536 for HBSQ001. These are of various types, including canonical transposons (13 families), IS605-type transposons (29 families) and three categories of transposase-free mobile repeats: MITEs (miniature inverted-repeat transposable elements, 5 families), PATEs (<underline>p</underline>alindrome-<underline>a</underline>ssociated <underline>t</underline>ransposable <underline>e</underline>lements, 6 families), and SMRs (<underline>s</underline>mall <underline>m</underline>obile <underline>r</underline>epeats, 5 families). Since MITEs lack a transposase, their mobilisation relies on an appropriate transposase provided <italic>in trans</italic>, which also determines the type of target duplication (if any). A MITE cannot be considered a degenerate version of the transposon supplying the transposase, as their sequences are unrelated except for the few terminal bases of the inverted terminal repeat. SMRs also contain ITRs but do not cause target duplications and, unlike MITEs, their ITRs show no sequence similarity to any of the transposons. Currently, their mode of mobilization is enigmatic. We observed a strong targetting preference for several of the SMR families (see below). PATEs differ from MITEs and SMRs in being devoid of ITRs, but they contain near-terminal palindromic sequences. During the analyses described below, these were found to be related to those of IS605-type transposons. Indeed, some PATEs appear to have originated from IS605-type transposon by repeat-mediated deletion of the core sequence, and so represent degenerate versions with only the fused terminal sequences.</p>
        <p>Transposase-free MGEs (MITEs, PATEs, and SMRs) are far more numerous than transposons (<xref ref-type="table" rid="pone-0020968-t004">Table 4</xref>) and are responsible for the majority of the strain differences caused by MGEs (<xref ref-type="fig" rid="pone-0020968-g006">Figure 6</xref>). The distribution of these elements between the two strains of <italic>Haloquadratum</italic> is similar except for two families of SMRs that are only present in C23<sup>T</sup> (see below). MGEs appear to be relatively evenly distributed around the C23<sup>T</sup> chromosome (data not shown) but the large plasmid, PL100, is completely free of SMRs, possibly reflecting a recent acquisition.</p>
        <table-wrap id="pone-0020968-t004" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0020968.t004</object-id><label>Table 4</label><caption>
            <title>General properties of the six categories of <italic>Haloquadratum</italic> MGEs.</title>
          </caption><!--===== Grouping alternate versions of objects =====--><alternatives><graphic id="pone-0020968-t004-4" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.t004" xlink:type="simple"/><table>
            <colgroup span="1">
              <col align="left" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
              <col align="center" span="1"/>
            </colgroup>
            <thead>
              <tr>
                <td align="left" colspan="1" rowspan="1">Category<xref ref-type="table-fn" rid="nt110">a</xref></td>
                <td align="left" colspan="1" rowspan="1">Families</td>
                <td align="left" colspan="1" rowspan="1">Types</td>
                <td align="left" colspan="1" rowspan="1">Total/SSEI (C23<sup>T</sup>)</td>
                <td align="left" colspan="1" rowspan="1">Total/SSEI (HBSQ001)</td>
                <td align="left" colspan="1" rowspan="1">Characteristics</td>
              </tr>
            </thead>
            <tbody>
              <tr>
                <td align="left" colspan="1" rowspan="1">IS605-type Transposons (TP-A)</td>
                <td align="left" colspan="1" rowspan="1">29</td>
                <td align="left" colspan="1" rowspan="1">44</td>
                <td align="left" colspan="1" rowspan="1">99/4(+3)</td>
                <td align="left" colspan="1" rowspan="1">118/10(+7)</td>
                <td align="left" colspan="1" rowspan="1">Do not have terminal inverted repeats. Have palindromes close to their termini. Do not cause TDs. Contain an entire transposase gene.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Canonical transposons (TP-B)</td>
                <td align="left" colspan="1" rowspan="1">13</td>
                <td align="left" colspan="1" rowspan="1">47</td>
                <td align="left" colspan="1" rowspan="1">73/28</td>
                <td align="left" colspan="1" rowspan="1">41/1</td>
                <td align="left" colspan="1" rowspan="1">Have terminal inverted repeats. Often cause target duplications. Contain an entire transposase gene.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Miniature inverted-repeat transposable elements (MITEs)</td>
                <td align="left" colspan="1" rowspan="1">5</td>
                <td align="left" colspan="1" rowspan="1">8</td>
                <td align="left" colspan="1" rowspan="1">55/14</td>
                <td align="left" colspan="1" rowspan="1">55/12</td>
                <td align="left" colspan="1" rowspan="1">Have terminal inverted repeats that are related to known transposons. Often cause target duplications. Too short to code for a transposase</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Palindrome-associated transposable elements (PATEs)</td>
                <td align="left" colspan="1" rowspan="1">6</td>
                <td align="left" colspan="1" rowspan="1">39</td>
                <td align="left" colspan="1" rowspan="1">177/10(+4)</td>
                <td align="left" colspan="1" rowspan="1">194/22(+4)</td>
                <td align="left" colspan="1" rowspan="1">Do not have terminal inverted terminal repeats. Have palindromes close to their termini. Do not cause TDs. Too short to code for a transposase.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">A-type Small Mobile Repeats (SMR-A)</td>
                <td align="left" colspan="1" rowspan="1">3</td>
                <td align="left" colspan="1" rowspan="1">3</td>
                <td align="left" colspan="1" rowspan="1">109/14</td>
                <td align="left" colspan="1" rowspan="1">128/16</td>
                <td align="left" colspan="1" rowspan="1">Have terminal inverted repeats but these are not related to known transposons. Do not cause TDs. Too short to code for a transposase. Frequently integrated into a TP-A or PATE.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">B-type Small Mobile Repeats (SMR-B)</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">2</td>
                <td align="left" colspan="1" rowspan="1">15/5</td>
                <td align="left" colspan="1" rowspan="1">0</td>
                <td align="left" colspan="1" rowspan="1">Have terminal inverted repeats but these are not related to known transposons. Do not cause TDs. Too short to code for a transposase.</td>
              </tr>
              <tr>
                <td align="left" colspan="1" rowspan="1">Total</td>
                <td align="left" colspan="1" rowspan="1">58</td>
                <td align="left" colspan="1" rowspan="1">143</td>
                <td align="left" colspan="1" rowspan="1">528</td>
                <td align="left" colspan="1" rowspan="1">536</td>
                <td align="left" colspan="1" rowspan="1"/>
              </tr>
            </tbody>
          </table></alternatives><table-wrap-foot>
            <fn id="nt110">
              <label>a</label>
              <p>Canonical transposons (TP-B), and A-/B-type Small Mobile Repeats (SMR) show a high sequence conservation between elements classified into the same type (commonly &gt;95% sequence identity). PATEs and IS605-type transposons are much more divergent, except for a few types of homogeneous PATEs. MGE types classified into the same family show sequence homology of the element itself or of the encoded transposase. The total number of elements and the number of SSEIs (total/SSEI) is given for strains C23<sup>T</sup> and HBSQ001. For TP-A and PATE, a distinction is made between SSEIs of complete elements (first value) and SSEIs of terminal-only elements that have suffered core deletions (second value in parenthesis, prefixed with a plus sign). Terminal-only TP-As have lost their transposase genes and so have degenerated to PATEs.</p>
            </fn>
          </table-wrap-foot></table-wrap>
        <p>In comparing MGEs between the two strains, the following categories were recognized: (a) common elements, which occur at corresponding genome positions, (b) strain-specific element insertions (SSEIs), which represent one type of strain-specific elements, and where the adjacent sequence is contiguous in one strain and interrupted by the mobile element in the other, (c) elements which occur within larger, strain-specific sequences (indels), and so are likely to have either been carried into the cell as part of a segment of foreign DNA, or have inserted into such a segment after its integration into the chromosome. In several cases, indels start and/or terminate with an MGE. This may imply that MGEs can trigger deletion events that may also remove neighbouring sequences.</p>
        <p>The mobility of MGEs was inferred from the presence of strain-specific insertions (SSEIs, case b), and although these could possibly also result from the removal of a hypothetical corresponding element in the other strain, e.g. by repeat-mediated deletions via the flanking target site duplications, we believe most SSEIs represent insertions. On this basis, the most active MGEs in C23<sup>T</sup> are the SMR HqIRS55 (10 SSEIs), the C23<sup>T</sup>-specific transposase ISHwa4 (8 SSEIs), and the PATE HqIRS56 (7 SSEIs).</p>
        <p>The key features of all <italic>Haloquadratum</italic> MGEs are summarized in <xref ref-type="supplementary-material" rid="pone.0020968.s009">Table S8</xref>, in which complete transposons are referred to by their ISFinder database numbers <xref ref-type="bibr" rid="pone.0020968-Siguier1">[68]</xref>, prefixed with ISHwa, while the other elements are assigned numbers prefixed with HqIRS (for <bold><italic><underline>H</underline></italic></bold><italic>alo</italic><bold><italic><underline>q</underline></italic></bold><italic>uadratum</italic> <bold><underline>I</underline></bold>S-<bold><underline>R</underline></bold>elated <bold><underline>S</underline></bold>equences).</p>
      </sec>
      <sec id="s2q">
        <title>Transposons and MITEs</title>
        <p>Twelve of the 13 distinct transposon families detected in the two strains have been previously described in <italic>Halobacterium</italic> <xref ref-type="bibr" rid="pone.0020968-Pfeiffer1">[21]</xref> or other haloarchaea, but the remaining one, ISHwa2, is related to Tn5, a transposon widely distributed in Bacteria. Although ISHwa2 occurs multiple times in both strains it is not found in any of the other 78 completely sequenced Archaea, except for a small remnant in <italic>Natronomonas pharaonis</italic>. This distribution is indicative of a lateral gene transfer event from a bacterial representative, and the transposase of ISHwa2 most closely matches homologues in <italic>Microcoleus chthonoplastes</italic>, a cyanobacterial species known to inhabit hypersaline environments.</p>
        <p>Transposon mobility is likely in at least 10 of the families as examples of SSEIs were observed but they were not equally active: seven of the 10 transposon families show SSEIs exclusively in strain C23<sup>T</sup>, while the remaining 3 transposon families show SSEIs in both strains.</p>
        <p>In some transposon families, the pattern of SSEIs and their distribution within and between strains indicates that the transposon was carried into the cell via an integration event of foreign DNA providing a source of infection for the rest of the genome. As detailed in <xref ref-type="supplementary-material" rid="pone.0020968.s011">Text S1</xref>, this is exemplified by a 43 kb C23<sup>T</sup>-specific sequence (DV10, #578), which carries a total of 10 transposons (or fragments thereof), several glycosyltransferase genes and one phage integrase homolog. It is likely that DV10 is responsible for the ‘outbreak’ of four transposons (ISHwa4, ISHwa8, ISHwa12, and ISHwa13). Although the DV10-specific copies of ISHwa8 and ISHwa12 are broken, this could have occurred after their spread to other genome locations (see <xref ref-type="supplementary-material" rid="pone.0020968.s011">Text S1</xref>).</p>
        <p>MITEs depend on a suitable transposase supplied <italic>in trans</italic> by a regular transposon, with which they share a similar inverted terminal repeat sequence as well as the length of an eventual target duplication <xref ref-type="bibr" rid="pone.0020968-Chen1">[69]</xref>. Four of the five distinct families of MITEs detected in <italic>Hqr. walsbyi</italic> are non-coding and shorter than 500 bp, and the most frequent of these have been previously described <xref ref-type="bibr" rid="pone.0020968-Chen1">[69]</xref>. The ISH2-type element HqIRS32 is the longest MITE (ca. 520 bp) and is the only example containing an ORF likely to code for a protein (but not a transposase). There is evidence from <italic>Halobacterium</italic> that this ORF in ISH2 is expressed <xref ref-type="bibr" rid="pone.0020968-Klein1">[70]</xref>. Homologous short ORFs were detected in (HqIRS33 on DV10, HqIRS34).</p>
        <p>The mobility of some MITEs, as reflected by SSEIs, was curious, as no functional copy of the appropriate transposase was found elsewhere in the genome. For example, MITE HqIRS39 has an ITR similar to that of transposon HqIRS12 but all copies of the HqIRS12 transposase are disrupted by frame-shifts or in-frame stop codons (see also <xref ref-type="supplementary-material" rid="pone.0020968.s011">Text S1</xref>).</p>
      </sec>
      <sec id="s2r">
        <title>Short mobile repeats (SMRs)</title>
        <p>Three families of SMRs (HqIRS42, HqIRS43, and HqIRS55, grouped as SMR-A) share a number of common features despite a lack of sequence similarity: they are about 110 bp long, have prominent inverted terminal repeats, do not generate target duplications, are relatively frequent (up to 54 copies in one genome), and seem to have a strong and identical target site preference, namely to target PATEs and IS605-type transposons, inserting within their near-terminal palindromes (see also below). As an example, the relatively frequent PATE HqIRS56 is regularly targeted by SMR-A elements (9 of 19 in C23<sup>T</sup> and 12 of 23 in HBSQ001).</p>
        <p>The other two families of SMRs (HqIRS40 and HqIRS41, grouped as SMR-B) occur only in C23<sup>T</sup>. SMR-Bs are similar to SMR-As in that they target other MGEs but show little targeting preference. HqIRS41 is exceedingly short, being only 80 bp long.</p>
      </sec>
      <sec id="s2s">
        <title>IS605-type transposons and PATEs</title>
        <sec id="s2s1">
          <title>IS1341-type transposases are highly expanded and diverse in halophilic archaea</title>
          <p>The transposition mechanism of IS605-type transposons has recently been elucidated, and is unusual in using a ssDNA intermediate <xref ref-type="bibr" rid="pone.0020968-TonHoang1">[71]</xref>. IS605-type transposons are characterized by palindromic sequences near both termini, which in some cases have an identical sequence. IS605-type transposon integration depends on a short oligonucleotide (4–6 bp) that functions as a targeting sequence <xref ref-type="bibr" rid="pone.0020968-Kersulyte1">[72]</xref> as already shown for ISHp608 from <italic>Helicobacter pylori</italic>. The terminal sequence prior to the near-terminal palindrome is highly variable. In IS<italic>Hp608</italic>, it has been shown that the IS200-type transposase is responsible for mobilization while the IS1341-type “transposase” is an accessory protein that does not function as transposase <xref ref-type="bibr" rid="pone.0020968-Kersulyte1">[72]</xref>.</p>
          <p>ISH1-8 was first detected on halophage phiH1 and corresponds to ISH12 from <italic>Halobacterium</italic>. It is a member of the IS605-type transposons, and codes for two proteins, TnpA (annotated as IS200-type transposase) and TnpB (annotated as IS1341-type transposase). The genes are serially encoded and much closer than initially annotated. Besides ISH12, <italic>Halobacterium</italic> contains a moderate number of homologs of the TnpA (6) and TnpB (16) transposases. All copies of <italic>tnpA</italic> are adjacent to <italic>tnpB</italic> (4 serial configurations, 2 divergent configurations). In the comparison of <italic>Halobacterium salinarum</italic> strains R1 and NRC-1, the mobility of this element was not obvious due to the lack of SSEIs <xref ref-type="bibr" rid="pone.0020968-Pfeiffer1">[21]</xref>.</p>
          <p>The family of IS605-type transposons is highly expanded in <italic>Natronomonas</italic>, which carries 38 copies of IS1341-type tranposase (<italic>tnpB</italic>) and 3 copies of IS200-type transposase (<italic>tnpA</italic>), each of the latter being encoded directly adjacent of a <italic>tnpB</italic> gene <xref ref-type="bibr" rid="pone.0020968-Falb2">[73]</xref>. However, the other 35 <italic>tnpB</italic> are not linked to a <italic>tnpA,</italic> which seems unusual if they were only accessory proteins. Among the peculiar features of IS605-related transposons are – besides the lack of ITRs – the lack of target duplications and the lack of close sequence similarity between different copies of the transposon. Commonly, transposases from mobilized transposons form large families, members of which share more than 90% sequence identity. However, if the 38 IS1341-type transposases of <italic>Natronomonas</italic> are clustered at the 90% sequence identity level, the largest cluster has only 3 members. There are only 5 additional clusters with two members; all the other IS1341-type transposases remain singlets at a 90% sequence identity level. This may indicate that a large number of distinct transposons have invaded the genome, but each of these is under a stringent copy number control. As IS605-type transposons lack all the characteristic features that facilitate identification of the element boundaries, annotation of the complete transposons is severely hampered.</p>
          <p>The family of IS605-type transposons is even further expanded in <italic>Haloquadratum,</italic> which codes for ca. 90 IS1341-type transposons (<italic>tnpB</italic>: C23<sup>T</sup>: 82; HBSQ001: 100) and ca. 16 IS200-type transposons (<italic>tnpA</italic>: C23<sup>T</sup>: 15; HBSQ001: 17). As in <italic>Natronomonas</italic>, IS1341-type transposons are extremely diverse with only very small clusters being formed at a 90% sequence identity level. The largest cluster of HBSQ001 has 3 members and each of these represents an SSEI. However, while the transposons in <italic>Natronomonas</italic> are commonly functional, in <italic>Haloquadratum</italic> they are mostly disrupted by deletions, frame-shifts, or in-frame stop codons (disrupted <italic>tnpA</italic>: <italic>Nmn</italic>., 0 of 3; C23<sup>T</sup>, 15 of 15; HBSQ001, 16 of 17; disrupted <italic>tnpB</italic>: <italic>Nmn</italic>., 6 of 38; C23<sup>T</sup>, 83 of 100; HBSQ001, 77 of 82). The finding that mobilization of IS605-type transposons depends exclusively on <italic>tnpA</italic> <xref ref-type="bibr" rid="pone.0020968-Kersulyte1">[72]</xref> is difficult to reconcile with our data on <italic>Haloquadratum,</italic> where SSEIs are observed although all of the IS200-type transposons are pseudogenes in C23<sup>T</sup>. Only a single IS200-type transposase is not disrupted in HBSQ001 and this is encoded adjacent to a disrupted IS1341-type transposase.</p>
        </sec>
        <sec id="s2s2">
          <title>Homogeneous and heterogeneous PATEs</title>
          <p>In the course of the analysis it became evident that the near-terminal palindromic sequences found on PATEs are similar to those of IS605-type transposons. In several cases, a single PATE has expanded in a way that is typical for other transposons, showing full-length homology with more than 90% sequence identity. These were designated homogeneous PATEs, and five types were detected (<xref ref-type="supplementary-material" rid="pone.0020968.s009">Table S8</xref>). Each of these shows SSEIs that, in combination with the high sequence similarity, facilitated definition of the element boundaries. Homogeneous PATEs show prominent near-terminal palindromes (<xref ref-type="supplementary-material" rid="pone.0020968.s010">Table S9</xref>) that, for each PATE, are similar in sequence, especially for HqIRS46, which has an exact duplication of 22 bp.</p>
          <p>BLASTN analysis of homogeneous PATE sequences resulted in a number of partial hits with either reduced sequence similarity or with hits that were short but highly conserved. These proved to belong to additional but heterogeneous PATEs. Even more hits were obtained upon blast analysis with the newly identified elements. Finally, this iterative approach resulted in the collection of PATE elements to saturation.</p>
          <p>Heterogeneity of PATE sequences is especially prominent in the terminal region beyond the near-terminal palindrome (20–50 bp). However, most PATE subclasses are linked to a well-conserved short “terminal oligonucleotide”, and when this could be linked to a near-terminal palindrome in a manual alignment, it was considered to represent the element boundary.</p>
          <p>Several individual copies of PATEs seemed to have incomplete near-palindrome sequences and to lack the terminal sequence, but upon further inspection, unrelated repeats (mainly SMR-A) were found at the ends of such incomplete PATEs. Due to the high sequence conservation of homogeneous PATEs, it was possible to detect that the missing terminal sequence occurs on the other side of the SMR-A repeat, consistent with these PATEs being targeted by SMR-A elements.</p>
        </sec>
        <sec id="s2s3">
          <title>PATEs and IS605-type transposons have similar boundaries</title>
          <p>Most of the haloarchaeal IS605-type transposons had been incompletely characterized, with annotations being restricted to the transposase-coding region. Complete element annotations including boundaries were available for only a few examples, mainly based on SSEIs. However, once it became evident that some PATEs have near-terminal palindromes that are similar to those of completely annotated IS605-type transposons, a systematic comparison was performed. Frequently, a segment of 200 bp upstream or downstream of the coding region matched near-terminal palindromes from PATEs. For this analysis, it was advantageous that heterogeneous PATEs had been collected to saturation, which increased the occupancy of the sequence space and allowed the detection of a sufficiently close homolog using BLASTN analysis. In many cases, the well-conserved ”terminal oligonucleotide” was also encountered (directly by blast or upon a manual alignment) and facilitated boundary detection. For the IS605-type transposon ISHp608, it was reported that the highly conserved “terminal oligonucleotide” functions as targeting sequence which is not part of the transposon <xref ref-type="bibr" rid="pone.0020968-TonHoang1">[71]</xref>, <xref ref-type="bibr" rid="pone.0020968-Kersulyte1">[72]</xref>. For practical reasons, we used the convention of annotating the targeting sequence as part of the element as this greatly facilitated homology-based boundary setting. However, data from PATE SSEIs support the assignment of the well-conserved “terminal oligonucleotide” as a targeting sequence rather than an integral part of the element. Most of the SSEIs are associated with a few “extra bases” in the element-free strain when SSEI boundaries are annotated according to the extended element including the targeting sequence (<xref ref-type="supplementary-material" rid="pone.0020968.s009">Table S8</xref>).</p>
          <p>As a result of the effort to define transposon boundaries, the number of completely annotated IS605-type transposons could be largely expanded. As the majority of IS605-type transposons in <italic>Haloquadratum</italic> code only for pseudogenes, the analysis was first performed for <italic>Natronomonas pharaonis</italic>. For this haloalkaliphilic archaeon, transposon boundaries could be defined for 22 of the 38 IS605-related transposons (submitted to the ISFinder database as ISNph5-ISNph22). The high sequence conservation between IS605-related transposons from <italic>Natronomonas</italic> and of homogeneous PATEs from <italic>Haloquadratum</italic> is remarkable (<xref ref-type="supplementary-material" rid="pone.0020968.s010">Table S9</xref>, section B). <italic>Haloquadratum</italic> IS605-type transposons with non-disrupted transposases, and for which complete boundaries could be assigned, were also submitted to ISFinder (ISHwa17-ISHwa19, ISHwa21-ISHwa29).</p>
        </sec>
      </sec>
      <sec id="s2t">
        <title>Core deletions in PATEs and IS605-type transposons occur by repeat-mediated deletion</title>
        <p>HqIRS46 is a PATE with a direct repeat of 22 bp, representing the two near-terminal palindromes (<xref ref-type="supplementary-material" rid="pone.0020968.s010">Table S9</xref>). In four cases, the element is complete in one strain while the core and one copy of the 22-mer have been deleted in the other strain. Core deletions, which lead to a terminal-only version of the element with only 52 bp, have all the characteristics of repeat-mediated deletions (<xref ref-type="supplementary-material" rid="pone.0020968.s007">Table S6</xref>). However, there are not only examples where the core has been deleted in one of the strains, there are also cases where terminal-only copies of HqIRS46 occur at the same genome position in both strains. In these cases it cannot be decided if the core has been deleted independently in the two strains or if the terminal-only version of the element has been mobilized.</p>
        <p>An equivalent core deletion occurs for IS605-type transposon ISHwa23, which codes for an IS1341-type transposase (TnpB). The deleted region is flanked by a 19 bp direct repeat which, again, resembles the near-terminal palindrome (<xref ref-type="supplementary-material" rid="pone.0020968.s007">Table S6</xref>). In this case, the element is shortened from a 1500 bp transposon to a 52 bp PATE. As described in the <xref ref-type="supplementary-material" rid="pone.0020968.s011">Text S1</xref>, this core deletion removes a transposon that must have been mobilized before. There are 6 SSEIs of ISHwa23 in HBSQ001, but in all six cases the terminal-only 52 bp version is found. This may indicate that this extremely short transposon-derived PATE can be mobilized.</p>
        <p>Several other of the IS605-type elements were found to exist as terminal-only PATEs. In addition, several elements contain other types of internal deletions that remove most of the transposase genes. In several transposon remnants that carry IS200-type and IS1341-type transposase genes in divergent orientation, large internal regions are deleted so that only short C-terminal remnants of the two transposons persist. One PATE, HqIRS71, shows high sequence similarity to an extended terminal segment of certain IS605-type transposons. This includes part of the C-terminal coding region, but all ORFs on HqIRS71 are disrupted and so HqIRS71 is considered to be non-coding.</p>
        <p>In summary, IS605-type transposons frequently suffer deletions in <italic>Haloquadratum</italic>. This is one of the reasons why most of the IS1341-type transposons are pseudogenes in this organism. On several occasions, direct repeats are found adjacent to the deleted region and thus we propose that the mechanism of repeat-mediated deletion provides a defense mechanism against IS605-type transposons and related PATEs in <italic>Haloquadratum</italic>.</p>
        <sec id="s2t1">
          <title>PATEs and IS605-type transposons are attacked by SMR-A type elements</title>
          <p>It has been mentioned that PATEs are targeted by SMR-A elements (HqIRS42, HqIRS43, and HqIRS55). MGE conglomerates, where one MGE is targeted by another MGE, have been frequently described for halophilic archaea, and we were initially not surprised to find SMR-A elements within PATEs. However, it became evident that being targeted by SMR-A elements is another characteristic that is common between PATEs and IS605-type transposons. Most of these targeting events occur outside the transposase ORF, in the 5′ or 3′ untranslated regions, and this only became evident upon precise annotation of complete transposons up to their boundaries.</p>
          <p>We systematically counted all cases where an MGE occurs within another MGE and found a total of 192 such targeting events (<xref ref-type="table" rid="pone-0020968-t005">Table 5</xref>). The majority of these (59%, 114 events) are caused by SMR-A elements and more than four-fifths (89%, 170 events) have been suffered by PATEs and IS605-type transposases. Together, more than half (54%, 104 events) of these events represent targeted insertions of a SMR-A element into a PATE or an IS605-type transposon. We consider it likely that this represents another cellular defence mechanism against PATEs and IS605-type transposons. It seems that the biological function of SMR-A elements is to inactivate IS605-type transposons and related PATEs, but their mechanism of mobilization remains enigmatic. While nearly all MITEs (30 of 31) are integrated into PATEs or IS605-type transposons they differ from A-type SMRs in not showing a strong preference for PATEs.</p>
          <table-wrap id="pone-0020968-t005" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0020968.t005</object-id><label>Table 5</label><caption>
              <title>Targeting of <italic>Haloquadratum</italic> MGEs. This lists the 192 targeting events where one MGE was found inserted into another MGE. Rows indicate how frequently elements of the given category have targeted another element, while columns indicate how frequently an element of the given category has been affected by insertion of another element. More than half of the targeting events are caused by A-type SMRs (114, 59%), while the majority of the affected elements are from PATEs (106, 55%) or from IS605-type transposons (TP-A, 64, 33%). Thus, these related categories of elements suffered 88% of the foreign element insertions.</title>
            </caption><!--===== Grouping alternate versions of objects =====--><alternatives><graphic id="pone-0020968-t005-5" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.t005" xlink:type="simple"/><table>
              <colgroup span="1">
                <col align="left" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
                <col align="center" span="1"/>
              </colgroup>
              <thead>
                <tr>
                  <td align="left" colspan="1" rowspan="1"/>
                  <td align="left" colspan="1" rowspan="1">PATE</td>
                  <td align="left" colspan="1" rowspan="1">TP-A</td>
                  <td align="left" colspan="1" rowspan="1">TP-B</td>
                  <td align="left" colspan="1" rowspan="1">MITE</td>
                  <td align="left" colspan="1" rowspan="1">SMR-A</td>
                  <td align="left" colspan="1" rowspan="1">SMR-B</td>
                  <td align="left" colspan="1" rowspan="1">Total</td>
                </tr>
              </thead>
              <tbody>
                <tr>
                  <td align="left" colspan="1" rowspan="1">
                    <bold>PATE</bold>
                  </td>
                  <td align="left" colspan="1" rowspan="1">6</td>
                  <td align="left" colspan="1" rowspan="1">12</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">18</td>
                </tr>
                <tr>
                  <td align="left" colspan="1" rowspan="1">
                    <bold>TP-A</bold>
                  </td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">5</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">5</td>
                </tr>
                <tr>
                  <td align="left" colspan="1" rowspan="1">
                    <bold>TP-B</bold>
                  </td>
                  <td align="left" colspan="1" rowspan="1">7</td>
                  <td align="left" colspan="1" rowspan="1">3</td>
                  <td align="left" colspan="1" rowspan="1">5</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">15</td>
                </tr>
                <tr>
                  <td align="left" colspan="1" rowspan="1">
                    <bold>MITE</bold>
                  </td>
                  <td align="left" colspan="1" rowspan="1">13</td>
                  <td align="left" colspan="1" rowspan="1">17</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">1</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">31</td>
                </tr>
                <tr>
                  <td align="left" colspan="1" rowspan="1">
                    <bold>SMR-A</bold>
                  </td>
                  <td align="left" colspan="1" rowspan="1">79</td>
                  <td align="left" colspan="1" rowspan="1">25</td>
                  <td align="left" colspan="1" rowspan="1">9</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">1</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">114</td>
                </tr>
                <tr>
                  <td align="left" colspan="1" rowspan="1">
                    <bold>SMR-B</bold>
                  </td>
                  <td align="left" colspan="1" rowspan="1">1</td>
                  <td align="left" colspan="1" rowspan="1">2</td>
                  <td align="left" colspan="1" rowspan="1">5</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">1</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">9</td>
                </tr>
                <tr>
                  <td align="left" colspan="1" rowspan="1">Total</td>
                  <td align="left" colspan="1" rowspan="1">106</td>
                  <td align="left" colspan="1" rowspan="1">64</td>
                  <td align="left" colspan="1" rowspan="1">19</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">3</td>
                  <td align="left" colspan="1" rowspan="1">-</td>
                  <td align="left" colspan="1" rowspan="1">192</td>
                </tr>
              </tbody>
            </table></alternatives></table-wrap>
        </sec>
      </sec>
      <sec id="s2u">
        <title>Conclusions</title>
        <p>The extraordinary genome conservation and shared CRISPR spacer relationships observed between geographically distant isolates of <italic>Hqr. walsbyi</italic> speak of a rapid global dispersal, possibly by airborne salt particles or migratory birds. The strong, global coherence of this species also argues for a strong selection against change and/or a high competitive fitness in hypersaline waters, perhaps related to protection from desiccation by halomucin. It seems that hypersaline waters all over the world represent a type of global pond for this organism. The genomes differ mainly by indels, deletion-coupled insertions, and movements of mobile genetic elements, with deletions occurring frequently via a potentially novel mechanism involving small direct repeats. The high number of pseudogenes, particularly of transposases, in both strains probably results from this, and may reflect a host protective mechanism against mobile genetic elements. Deletion-coupled insertions indicate the uptake and integration of foreign DNA but the mechanisms underlying the generation of these identically placed but unrelated sequences remain to be understood. Strain C23<sup>T</sup> was probably infected by four distinct transposons upon integration of a foreign sequence of 42.9 kb. At least two distinct mechanisms are used to attack transposons. Repeat-mediated deletions permit trace-free removal of canonical transposons, taking advantage of the associated target duplications. The same mechanism can also inactivate IS605-type transposons (and associated PATEs) by causing element core deletions. In addition, the SMR-A category of small mobile repeats preferentially targets IS605-type transposons and PATEs. The link between PL6-like plasmids and conserved genomic loci related to haloviruses (ViPREs), points to a close relationship between plasmids and viruses in haloarchaea, the details of which remain to be understood.</p>
      </sec>
    </sec>
    <sec id="s3" sec-type="materials|methods">
      <title>Materials and Methods</title>
      <sec id="s3a">
        <title>Sequence determination and assembly</title>
        <p><italic>Haloquadratum walsbyi</italic> C23<sup>T</sup> ( = JCM 12705<sup>T</sup> = DSM 16854<sup>T</sup>) was cultivated in DBCM2 medium as described previously <xref ref-type="bibr" rid="pone.0020968-Burns1">[8]</xref>. Total DNA was extracted from cell pellets using the Qiagen Genomic-tip DNA extraction kit (Qiagen, GmbH). Initial sequencing was performed using the Roche GS FLX sequencing platform with LR70 reagents. A total of 69.6 Mb data was collected and assembled using the Newbler software (provided with the sequencing instrument). A combined total of 3.07 Mb resulted from the initial round of assembly with 99.78% of the bases being Q40 or higher, at an average coverage of 22.5 fold. The resulting contigs were ordered according to the genome of HBS001, which gave an early indication of the complete synteny between these two strains. PCR on genomic DNA provided templates to primer walk across the contig gaps. Assembly of the 454 contigs and PCR sequences utilized the Phred–Phrap–Consed package <xref ref-type="bibr" rid="pone.0020968-Gordon1">[74]</xref>.</p>
        <p>The reliability of the 454 sequencing results was analyzed. Bases with a low quality values (&lt; 25 from the Newbler software) were checked. Of these 186 could be compared to data from a preliminary sequencing effort. Only 2 differences were observed. Low quality bases within polynucleotide runs, which potentially cause frame-shifts were compared by BLASTX to proteins from haloarchaea with or without the low quality base. This also indicated that most of the low quality bases were correct. Additional potential errors in the 454 sequenced contigs were identified by comparison with the sequence of the closely related HBSQ001 strain, particularly at positions where the C23<sup>T</sup> base was of low quality and differed from the corresponding base in the other strain. PCR amplification and re-sequencing of these regions in the C23<sup>T</sup> genome detected a total of 42 errors. Seven additional errors were detected when proteomic data resulted in the identification of pseudogenes. The total number of errors thus computes to 15/Mb. The sequences of primers used for PCR and primer walking are available on request from the corresponding author. The sequences of the main chromosome, PL100, PL6A and PL6B have the following EMBL accession numbers: FR746099, FR746100, FR746101 and FR746102.</p>
      </sec>
      <sec id="s3b">
        <title>PCR for gap closure and error correction</title>
        <p>The TaKaRa LA Taq kit (Takara Bio Inc., Japan) was used for all PCRs. Template was genomic DNA from <italic>Haloquadratum walsbyi</italic> C23<sup>T</sup> (described above). Reaction cycles and conditions were varied depending upon the expected product length and template difficulty (ISH elements), but a typical scheme was as follows: 50 µl reaction, ∼100 ng DNA: initial denaturation at 94°C, 1 min; 25 cycles of 94°C/30 sec, 58°C/30 sec, 72°C/4 min; final extension 72°C/4 min. Product size and quality was assessed by agarose gel electrophoresis of a 2 µl sample. Where reactions contained subsidiary, unspecific products, the major band was purified by preparative agarose gel electrophoresis. All products were purified to remove primers and salts using the Wizard SV gel and PCR cleanup kit (Promega, GmbH). Sequencing of PCR products was performed using the ABI PRISM BigDye terminator method at the MPI of Biochemistry core sequencing facility.</p>
      </sec>
      <sec id="s3c">
        <title>Sequencing of large plasmid PL100</title>
        <p>Several sequencing reads from an earlier, preliminary sequencing effort could not be matched to the genome sequence based on 454 sequencing. Some of these could be assembled into contigs, indicating that they were not simple contaminants. PCR primers designed from these contigs were then used to detect the presence of a plasmid in some of the subcultures of C23<sup>T</sup>. As the original sequencing reads were mate pairs, this enabled contig adjacencies to be determined, which could be used to close most gaps by a PCR approach using (as a template) plasmid DNA purified from a positive subculture detected in the previous PCR. Due to the low overall sequence coverage, non-assembled singletons in the original dataset were also utilized in phred-phrap assembly, and these enabled further extension of remaining contigs, which could be incorporated by further PCR-based gap closure. Final closure of PL100 was achieved by an all-against-all PCR approach on the few remaining contigs.</p>
      </sec>
      <sec id="s3d">
        <title>Gene prediction and annotation</title>
        <p>The genome annotation and core analyses were performed using the HaloLex system (<ext-link ext-link-type="uri" xlink:href="http://www.halolex.mpg.de" xlink:type="simple">http://www.halolex.mpg.de</ext-link>, <xref ref-type="bibr" rid="pone.0020968-Pfeiffer2">[75]</xref>). Further analyses were done using the MIGenAS server (<ext-link ext-link-type="uri" xlink:href="http://www.migenas.org" xlink:type="simple">http://www.migenas.org</ext-link>, <xref ref-type="bibr" rid="pone.0020968-Rampp1">[76]</xref>). The common core of the two strains shows 98.6% sequence identity. Accordingly, there must be a complete consistency with respect to ORF annotations in this shared sequence, including annotation of pseudogenes. An attempt has been made to keep the ORF annotations in the two strains as consistent as possible.</p>
      </sec>
      <sec id="s3e">
        <title>Bioinformatics tools</title>
        <p>A number of specific bioinformatics tools were used, including the following for: CRISPRs (<ext-link ext-link-type="uri" xlink:href="http://crispr.u-psud.fr/" xlink:type="simple">http://crispr.u-psud.fr/</ext-link>), genome alignment using MUMmer (<ext-link ext-link-type="uri" xlink:href="http://mummer.sourceforge.net/" xlink:type="simple">http://mummer.sourceforge.net/</ext-link>), 7S RNA detection (<ext-link ext-link-type="uri" xlink:href="http://bio.lundberg.gu.se/srpscan/" xlink:type="simple">http://bio.lundberg.gu.se/srpscan/</ext-link>), rRNA detection (<ext-link ext-link-type="uri" xlink:href="http://www.cbs.dtu.dk/services/RNAmmer/" xlink:type="simple">http://www.cbs.dtu.dk/services/RNAmmer/</ext-link>), tRNA detection with tRNA-scan (<ext-link ext-link-type="uri" xlink:href="http://lowelab.ucsc.edu/tRNAscan-SE/" xlink:type="simple">http://lowelab.ucsc.edu/tRNAscan-SE/</ext-link>) and Aragorn (<ext-link ext-link-type="uri" xlink:href="http://130.235.46.10/ARAGORN/" xlink:type="simple">http://130.235.46.10/ARAGORN/</ext-link>), tetramer analysis (<ext-link ext-link-type="uri" xlink:href="http://www.megx.net/tetra" xlink:type="simple">http://www.megx.net/tetra</ext-link>) <xref ref-type="bibr" rid="pone.0020968-Teeling1">[77]</xref>, genome comparison/alignment (<ext-link ext-link-type="uri" xlink:href="http://asap.ahabs.wisc.edu/mauve/" xlink:type="simple">http://asap.ahabs.wisc.edu/mauve/</ext-link>), cumulative skew plots with GenSkewApp (<ext-link ext-link-type="uri" xlink:href="http://www.helmholtz-muenchen.de/en/mips/services/analysis-tools/genskew/index.html" xlink:type="simple">http://www.helmholtz-muenchen.de/en/mips/services/analysis-tools/genskew/index.html</ext-link>), GC-Profile (<ext-link ext-link-type="uri" xlink:href="http://tubic.tju.edu.cn/GC-Profile/" xlink:type="simple">http://tubic.tju.edu.cn/GC-Profile/</ext-link>), Z-curve plots (<ext-link ext-link-type="uri" xlink:href="http://tubic.tju.edu.cn/zcurve/" xlink:type="simple">http://tubic.tju.edu.cn/zcurve/</ext-link>), GC% with a 2.5 SD cutoff <xref ref-type="bibr" rid="pone.0020968-Rutherford1">[78]</xref>, and elements of the EMBOSS suite of bioinformatics programs found within the eBioX program (<ext-link ext-link-type="uri" xlink:href="http://www.ebioinformatics.org" xlink:type="simple">http://www.ebioinformatics.org</ext-link>). Pilfind was computed at <ext-link ext-link-type="uri" xlink:href="http://signalfind.org/" xlink:type="simple">http://signalfind.org/</ext-link>.</p>
      </sec>
      <sec id="s3f">
        <title>Comparision of the chromosome sequences</title>
        <p>A PERL-based script was applied to compare the two chromosome sequences. This script allows a base-by-base comparison but is robust against sequence duplications and indels of any size. The script interprets the pair of sequences as an alternate set of “runs”, defined as subsequences that are completely identical, and “connectors”, which are the divergent sequences that occur between runs. Runs are identified by “word matches”, using exclusively words that are unique in each of the sequences. These words are expanded on both sides as long as the sequences remain identical, which results in a run of a given length. In an initial scaffolding phase, long words (15-mers) are used and only the longest runs (summing up to 30% of the total chromosomal sequences) are considered. The serial order of the runs is determined, which is preserved within synteneic regions of the chromosomes. The result of this phase is an ordered set of runs that build a scaffold of the chromosome sequences. In a second phase, each of the intermediate sequences between runs is subjected to the same procedure except that a short word length (3 bases) is applied and the longest identified run is immediately intercalated into the scaffold. This intercalation creates up to two new but shorter intermediate sequences, one on each side of the newly intercalated run. Intermediate sequences for which no additional run can be identified are “connectors”. Because the algorithm uses trinucleotides as the shortest word size, and thus the minimal length of a run, it performs a base-by-base comparison of the chromosomes. All sequence differences are then represented in the connectors. The procedure allows any length of the connector, from zero (in case of an indel) to more than 100 kb (a huge strain-specific region). As an example, point mutations are connectors of length one in both sequences.</p>
        <p>The algorithm can handle sequences that are full of duplications, a common problem in the genomes of halophilic archaea. Only words that are unique in each of the sequences are applied for matching, which excludes words originating from duplications. After this initial matching, sequences are extended on both sides as long as they remain identical. No restrictions apply in this step, so that co-localized copies of duplicated sequences will become part of the corresponding runs. Sequence deviations between the co-localized copies do not cause any problems as they are handled in the second phase, being intermediate sequences between runs. At this stage, additional copies located elsewhere in the genome are no longer relevant.</p>
        <p>Most connectors represent trivial sequence differences (e.g. point mutations). For the current project, connectors were selected as non-trivial if they fulfilled one of the following criteria: (a) a length difference of at least 20 bp, (b) the number of single-base differences exceeds 10 in a single connector (which is defined not to have even a single unique trinucleotide match), or (c) the sequence identity is below 50% (which is used only for connectors of at least 7 bp length). These selection criteria are highly relaxed, allowing a detailed comparison of the two chromosomes to be performed. The selected set of 512 non-trivial connectors was inspected manually. The region of difference, and 100–200 bases on each side, were compared to the genome sequences using BLASTN. Cases representing just an enhanced sequence variability (as obvious from a contiguous blast alignment which is free of long gaps) were skipped. The remaining set contained 360 strain-specific sequences. The remainder of the chromosomes, excluding the strain-specific sequences, represents the shared sequence.</p>
      </sec>
    </sec>
    <sec id="s4">
      <title>Supporting Information</title>
      <supplementary-material id="pone.0020968.s001" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s001" xlink:type="simple">
        <label>Figure S1</label>
        <caption>
          <p><bold>Sequence motifs upstream of cdc6 genes in </bold><bold><italic>Hqr. walsbyi</italic></bold><bold> C23<sup>T</sup>.</bold> At the left of each group of sequences is the Ori name and the locus tag of the nearby <italic>cdc6</italic> gene, containing C for chromosomal or P for plasmid PL100. The direction of the inverted repeats are indicated by &gt; or &lt; at the right of each sequence. Identical bases in the repeats are indicated by green boxes. For comparison, the <italic>Haloferax volcanii ori</italic>C1 ORB sequence is shown above the predicted <italic>ori</italic>C1 of <italic>Hqr. walsbyi.</italic> For details of the predicted DUE, Mini-ORB, G-string and Halo-G-string motifs, see <xref ref-type="bibr" rid="pone.0020968-Norais1">[29]</xref>.</p>
          <p>(TIF)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s002" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s002" xlink:type="simple">
        <label>Table S1</label>
        <caption>
          <p>
            <bold>Strongly under-represented tetramer sequences in </bold>
            <bold>
              <italic>Hqr. walsbyi</italic>
            </bold>
            <bold> C23<sup>T</sup> replicons.</bold>
          </p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s003" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s003" xlink:type="simple">
        <label>Table S2</label>
        <caption>
          <p><bold>Codon frequencies in </bold><bold><italic>Haloquadratum walsbyi.</italic></bold> The table provides codon frequencies for the 20 amino acids, based on 850405 codons. Start and stop codons were excluded from the computation. The relative amino acid frequencies are given in the “%(total)” column. Frequencies are attributed to their corresponding codons. The first two codon bases are in the column ‘1<sup>st</sup>/2<sup>nd</sup> BASES’, and the third base is in the column ‘3<sup>rd</sup> BASE’. The most frequent codon is specified only if the codonset includes an NNT codon. An NNT codon is most frequent for 10 amino acids while an nnA codon is most frequent for 5 amino acids that also use NNT codons.</p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s004" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s004" xlink:type="simple">
        <label>Table S3</label>
        <caption>
          <p>
            <bold>Mass spectrometry of proteins in a cell membrane preparation of strain C23<sup>T</sup>.</bold>
          </p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s005" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s005" xlink:type="simple">
        <label>Table S4</label>
        <caption>
          <p><bold>Categories of strain-specific regions.</bold> Strain-specific sequences were classified into various categories. For mobile genetic elements (MGEs), a distinction is made between transposons and transposase-free MGEs (MITEs, PATEs and other short mobile repeats; Insert_MITE_PATE_SMR). Indels and deletion-coupled insertions (DCI) are categorized as long (&gt;1.5 kb), short (&lt;150 bp) and medium (150 bp–1.5 kb). Indel_PolyRepeat indicates that the copy number for short tandem repeats differs between strains. SwitchRepeats are cases where, at an identical position, are either two distinct repeats or two copies of the same repeat in opposite orientation. Delete_Repeatcore are cases where a transposon or repeat is complete in one strain while the central part has been deleted in the other strain, leaving only the fused terminal sequences. Divergent_Gene refers to the in-situ indels that occur within the repeat regions of the halomucin gene. Finally, there are a few miscellaneous strain-specific regions (Indel_Misc). Some of the categories have been combined in the pie chart of <xref ref-type="fig" rid="pone-0020968-g006">Figure 6</xref>.</p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s006" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s006" xlink:type="simple">
        <label>Table S5</label>
        <caption>
          <p><bold>Listing of all common and strain-specific regions.</bold> As not even a single genome rearrangement has occurred, the alignment of the two chromosomes can be represented by a series of alternating common and strain-specific regions. Common regions, which collectively define the shared sequence, are labeled by a dash in the category column. Categories of strain-specific sequences are those described in <xref ref-type="supplementary-material" rid="pone.0020968.s005">Table S4</xref>. Position and length are provided for each strain as applicable. For indels that are bounded by direct repeats (of length specified in the overlap column), the core (length) and one copy of the repeat (overlap) are deleted. The total number of deleted bases (sum) is given in the corresponding column. Strain-specific regions can be located in intergenic regions, within genes, or in transposons and repeats (location). The description column provides additional data: (a) Relevant strain-specific regions (DV, GI as given in <xref ref-type="table" rid="pone-0020968-t003">Table 3</xref>). (b) Names of transposons, MITEs, and other short mobile repeats; Extensions in parentheses indicate that the repeat has been targeted by another repeat or has suffered a core-deletion. In the category SwitchRepeats, two elements are specified if they are different or the element is marked (fwd/rev) in cases where the same element occurs in opposite orientation. (c) In several cases, it was possible to determine which of the strains contains the ancestral sequence, e.g. by analysis of gene truncations across indel boundaries. (d) Indels within genes may disrupt the coding region. In cases of “sequence not affected”, the insertion has occurred such that at maximum a few C-terminal residues are affected. In cases of “reading frame conserved”, the indel does not disrupt the reading frame.</p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s007" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s007" xlink:type="simple">
        <label>Table S6</label>
        <caption>
          <p><bold>Listing of indels which are bounded by direct repeats.</bold> This table is a subset of <xref ref-type="supplementary-material" rid="pone.0020968.s005">Table S4</xref>, and contains only those indels bounded by direct repeats. The same region numbers are used. All other columns are as described for <xref ref-type="supplementary-material" rid="pone.0020968.s005">Table S4</xref>. The indels are sorted according to the length in the direct repeat (overlap column). Transposons and MITEs, which cause target duplications, have been excluded from this table.</p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s008" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s008" xlink:type="simple">
        <label>Table S7</label>
        <caption>
          <p>
            <bold>Sequence matches to CRISPR spacers in strains HBSQ001 and C23<sup>T</sup>.</bold>
          </p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s009" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s009" xlink:type="simple">
        <label>Table S8</label>
        <caption>
          <p><bold>Listing of all </bold><bold><italic>Haloquadratum</italic></bold><bold> MGEs.</bold> MGE codes start either with ISHwa or with HqIRS. MGEs with ISHwa codes are present in (or submitted to) ISFinder. A prerequisite for an ISFinder submission is that the MGE is complete, its boundaries are defined, and the MGE codes for a complete transposase. Element groups are a clustering of MGEs based on sequence similarity and common characteristics. The categories are: TP-A (IS605-type transposon), TP-B (common transposon), MITE (miniature inverted-repeat transposable elements), PATE (palindrome-associated transposable element), or SMR-A, SMR-B (short mobile repeat of types A, B). Length, indicates the length range of complete copies of the element. A hyphen indicates that none of the copies is complete. The ITR and TD fields contain data only if at least one of the elements has complete boundaries on both sides. ITR provides the inverted terminal repeat of a typical member in ISFinder style where the second number corresponds to the ITR length and the first number to the number of matching bases within the ITR. The term “none” indicates that the MGE does not cause target duplications. TD provides the length range of target duplications. The term “none” indicates that the MGE does not cause target duplications. The #(C23<sup>T</sup>) and #(HBSQ001) columns provides the total number of MGEs, followed in parenthesis by the number of elements with complete sequences and the number of elements with complete transposases (devoid of deletions, frame-shifts or in-frame stop codons). The SSEI column provides the number of element copies that occur as SSEIs (strain-specific element insertions). Some of these data are replaced by hyphens if the element is not present in the strain (total number is zero. The remarks column specifies the relationship between MITEs and their parent transposons.</p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s010" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s010" xlink:type="simple">
        <label>Table S9</label>
        <caption>
          <p><bold>Near terminal palindromes in homogeneous PATEs and relation to IS605 type transposons.</bold> The terminal sequences of homogenous PATEs are compared in several ways. In section A, terminal sequences are compared within each element. The 5′ and 3′ “palindromic end of element” highlights palindromes by comparison of the terminal sequence to its reverse-complement. “Comparison of 5′ and 3′ palindromic ends” highlights similarities between the terminal sequences. For HqIRS46, the 52 bp terminal_only version of the element, generated by repeat-mediated deletion within a perfect 22 bp repeat, is shown (core del). In all cases, internal sequence ends are indicated by three dots. For the 3′ end, both directions show the terminal sequence fragment. It should be noted that the terminal sequences of distinct PATEs show similarities at the very 3′ end. For the 5′ end, the targeting sequence of 4–5 bp is included. The distance between the element start and the near-terminal palindrome is longer at the 5′ end compared to the 3′ end and this extended sequence is not displayed for the reverse strand. In section B, the terminal sequence of the PATE is compared to that of other elements. There is a remarkably high level of sequence similarity between homogenous PATEs from <italic>Haloquadratum</italic> and IS605-type transposases from <italic>Natronomonas pharaonis</italic>.</p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0020968.s011" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pone.0020968.s011" xlink:type="simple">
        <label>Text S1</label>
        <caption>
          <p>
            <bold>Descriptions of genomic changes related to MGEs.</bold>
          </p>
          <p>(DOC)</p>
        </caption>
      </supplementary-material>
    </sec>
  </body>
  <back>
    <ack>
      <p>We thank Bettina Brustmann for technical assistance; Snezan Marinkovic, Cyril Boulegue and Michel Mayr for analyses performed at the MPI of Biochemistry core facility; and Michaela Falb for critical reading of the manuscript. We thank Jose Mejia for supporting the extension of the HaloLex system.</p>
    </ack>
    <ref-list>
      <title>References</title>
      <ref id="pone.0020968-Walsby1">
        <label>1</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Walsby</surname><given-names>AE</given-names></name></person-group>             <year>1980</year>             <article-title>A square bacterium.</article-title>             <source>Nature (London)</source>             <volume>283</volume>             <fpage>69</fpage>             <lpage>71</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Oren1">
        <label>2</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Oren</surname><given-names>A</given-names></name></person-group>             <year>2002</year>             <article-title>Molecular ecology of extremely halophilic Archaea and Bacteria.</article-title>             <source>FEMS Microbiol Ecol</source>             <volume>39</volume>             <fpage>1</fpage>             <lpage>7</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Antn1">
        <label>3</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Antón</surname><given-names>J</given-names></name><name name-style="western"><surname>Llobet-Brossa</surname><given-names>E</given-names></name><name name-style="western"><surname>Rodríguez-Valera</surname><given-names>F</given-names></name><name name-style="western"><surname>Amann</surname><given-names>R</given-names></name></person-group>             <year>1999</year>             <article-title>Fluorescence <italic>in situ</italic> hybridization analysis of the prokaryotic community inhabiting crystallizer ponds.</article-title>             <source>Environ Microbiol</source>             <volume>1</volume>             <fpage>517</fpage>             <lpage>523</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Stoeckenius1">
        <label>4</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Stoeckenius</surname><given-names>W</given-names></name></person-group>             <year>1981</year>             <article-title>Walsby's square bacterium: fine structure of an orthogonal procaryote.</article-title>             <source>J Bacteriol</source>             <volume>148</volume>             <fpage>352</fpage>             <lpage>360</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Kessel1">
        <label>5</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kessel</surname><given-names>M</given-names></name><name name-style="western"><surname>Cohen</surname><given-names>Y</given-names></name></person-group>             <year>1982</year>             <article-title>Ultrastructure of square bacteria from a brine pool in Southern Sinai.</article-title>             <source>J Bacteriol</source>             <volume>150</volume>             <fpage>851</fpage>             <lpage>860</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Oren2">
        <label>6</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Oren</surname><given-names>A</given-names></name><name name-style="western"><surname>Duker</surname><given-names>S</given-names></name><name name-style="western"><surname>Ritter</surname><given-names>S</given-names></name></person-group>             <year>1996</year>             <article-title>The polar lipid composition of Walsby's square bacterium.</article-title>             <source>FEMS Microbiol Lett</source>             <volume>138</volume>             <fpage>135</fpage>             <lpage>140</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Bolhuis1">
        <label>7</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Bolhuis</surname><given-names>H</given-names></name><name name-style="western"><surname>Palm</surname><given-names>P</given-names></name><name name-style="western"><surname>Wende</surname><given-names>A</given-names></name><name name-style="western"><surname>Falb</surname><given-names>M</given-names></name><name name-style="western"><surname>Rampp</surname><given-names>M</given-names></name><etal/></person-group>             <year>2006</year>             <article-title>The genome of the square archaeon <italic>Haloquadratum walsbyi</italic>: life at the limits of water activity.</article-title>             <source>BMC Genomics</source>             <volume>7</volume>             <fpage>169</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Burns1">
        <label>8</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Burns</surname><given-names>DG</given-names></name><name name-style="western"><surname>Janssen</surname><given-names>PH</given-names></name><name name-style="western"><surname>Itoh</surname><given-names>T</given-names></name><name name-style="western"><surname>Kamekura</surname><given-names>M</given-names></name><name name-style="western"><surname>Li</surname><given-names>Z</given-names></name><etal/></person-group>             <year>2007</year>             <article-title><italic>Haloquadratum walsbyi</italic> gen. nov., sp. nov., the square haloarchaeon of Walsby, isolated from saltern crystallizers in Australia and Spain.</article-title>             <source>Int J Syst Evol Microbiol</source>             <volume>57</volume>             <fpage>387</fpage>             <lpage>392</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Oren3">
        <label>9</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Oren</surname><given-names>A</given-names></name></person-group>             <year>2007</year>             <article-title>The Order Halobacteriales.</article-title>             <person-group person-group-type="editor"><name name-style="western"><surname>Dworkin</surname><given-names>M</given-names></name><name name-style="western"><surname>Falkow</surname><given-names>S</given-names></name><name name-style="western"><surname>Rosenberg</surname><given-names>E</given-names></name><name name-style="western"><surname>Schleifer</surname><given-names>K-H</given-names></name><name name-style="western"><surname>Stackebrandt</surname><given-names>E</given-names></name></person-group>             <source>The Prokaryotes: Springer</source>          </element-citation>
      </ref>
      <ref id="pone.0020968-Bolhuis2">
        <label>10</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Bolhuis</surname><given-names>H</given-names></name><name name-style="western"><surname>Poele</surname><given-names>EM</given-names></name><name name-style="western"><surname>Rodríguez-Valera</surname><given-names>F</given-names></name></person-group>             <year>2004</year>             <article-title>Isolation and cultivation of Walsby's square archaeon.</article-title>             <source>Environ Microbiol</source>             <volume>6</volume>             <fpage>1287</fpage>             <lpage>1291</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Burns2">
        <label>11</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Burns</surname><given-names>DG</given-names></name><name name-style="western"><surname>Camakaris</surname><given-names>HM</given-names></name><name name-style="western"><surname>Janssen</surname><given-names>PH</given-names></name><name name-style="western"><surname>Dyall-Smith</surname><given-names>ML</given-names></name></person-group>             <year>2004</year>             <article-title>Cultivation of Walsby's square haloarchaeon.</article-title>             <source>FEMS Microbiol Lett</source>             <volume>238</volume>             <fpage>469</fpage>             <lpage>473</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Falb1">
        <label>12</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Falb</surname><given-names>M</given-names></name><name name-style="western"><surname>Muller</surname><given-names>K</given-names></name><name name-style="western"><surname>Konigsmaier</surname><given-names>L</given-names></name><name name-style="western"><surname>Oberwinkler</surname><given-names>T</given-names></name><name name-style="western"><surname>Horn</surname><given-names>P</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Metabolism of halophilic archaea.</article-title>             <source>Extremophiles</source>             <volume>12</volume>             <fpage>177</fpage>             <lpage>196</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-CuadrosOrellana1">
        <label>13</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Cuadros-Orellana</surname><given-names>S</given-names></name><name name-style="western"><surname>Martin-Cuadrado</surname><given-names>AB</given-names></name><name name-style="western"><surname>Legault</surname><given-names>B</given-names></name><name name-style="western"><surname>D'Auria</surname><given-names>G</given-names></name><name name-style="western"><surname>Zhaxybayeva</surname><given-names>O</given-names></name><etal/></person-group>             <year>2007</year>             <article-title>Genomic plasticity in prokaryotes: the case of the square haloarchaeon.</article-title>             <source>ISME J</source>             <volume>1</volume>             <fpage>235</fpage>             <lpage>245</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Legault1">
        <label>14</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Legault</surname><given-names>BA</given-names></name><name name-style="western"><surname>López-López</surname><given-names>A</given-names></name><name name-style="western"><surname>Alba-Casado</surname><given-names>JC</given-names></name><name name-style="western"><surname>Doolittle</surname><given-names>WF</given-names></name><name name-style="western"><surname>Bolhuis</surname><given-names>H</given-names></name><etal/></person-group>             <year>2006</year>             <article-title>Environmental genomics of “Haloquadratum walsbyi” in a saltern crystallizer indicates a large pool of accessory genes in an otherwise coherent species.</article-title>             <source>BMC Genomics</source>             <volume>7</volume>             <fpage>171</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Tettelin1">
        <label>15</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Tettelin</surname><given-names>H</given-names></name><name name-style="western"><surname>Masignani</surname><given-names>V</given-names></name><name name-style="western"><surname>Cieslewicz</surname><given-names>MJ</given-names></name><name name-style="western"><surname>Donati</surname><given-names>C</given-names></name><name name-style="western"><surname>Medini</surname><given-names>D</given-names></name><etal/></person-group>             <year>2005</year>             <article-title>Genome analysis of multiple pathogenic isolates of <italic>Streptococcus agalactiae</italic>: implications for the microbial “pan-genome”.</article-title>             <source>Proc Natl Acad Sci U S A</source>             <volume>102</volume>             <fpage>13950</fpage>             <lpage>13955</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Oh1">
        <label>16</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Oh</surname><given-names>D</given-names></name><name name-style="western"><surname>Porter</surname><given-names>K</given-names></name><name name-style="western"><surname>Russ</surname><given-names>B</given-names></name><name name-style="western"><surname>Burns</surname><given-names>D</given-names></name><name name-style="western"><surname>Dyall-Smith</surname><given-names>M</given-names></name></person-group>             <year>2010</year>             <article-title>Diversity of <italic>Haloquadratum</italic> and other haloarchaea in three, geographically distant, Australian saltern crystallizer ponds.</article-title>             <source>Extremophiles</source>             <volume>14</volume>             <fpage>161</fpage>             <lpage>169</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-PedrosAlio1">
        <label>17</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pedros-Alio</surname><given-names>C</given-names></name><name name-style="western"><surname>Calderon-Paz</surname><given-names>JI</given-names></name><name name-style="western"><surname>MacLean</surname><given-names>MH</given-names></name><name name-style="western"><surname>Medina</surname><given-names>G</given-names></name><name name-style="western"><surname>Marrase</surname><given-names>C</given-names></name><etal/></person-group>             <year>2000</year>             <article-title>The microbial food web along salinity gradients.</article-title>             <source>FEMS Microbiol Ecol</source>             <volume>32</volume>             <fpage>143</fpage>             <lpage>155</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-GuixaBoixareu1">
        <label>18</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Guixa-Boixareu</surname><given-names>N</given-names></name><name name-style="western"><surname>Calderón-Paz</surname><given-names>JI</given-names></name><name name-style="western"><surname>Heldal</surname><given-names>M</given-names></name><name name-style="western"><surname>Bratbak</surname><given-names>G</given-names></name><name name-style="western"><surname>Pedrós-Alió</surname><given-names>C</given-names></name></person-group>             <year>1996</year>             <article-title>Viral lysis and bacterivory as prokaryotic loss factors along a salinity gradient.</article-title>             <source>Aquat Microbial Ecol</source>             <volume>11</volume>             <fpage>215</fpage>             <lpage>227</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Karginov1">
        <label>19</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Karginov</surname><given-names>FV</given-names></name><name name-style="western"><surname>Hannon</surname><given-names>GJ</given-names></name></person-group>             <year>2010</year>             <article-title>The CRISPR system: small RNA-guided defense in bacteria and archaea.</article-title>             <source>Mol Cell</source>             <volume>37</volume>             <fpage>7</fpage>             <lpage>19</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Held1">
        <label>20</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Held</surname><given-names>NL</given-names></name><name name-style="western"><surname>Whitaker</surname><given-names>RJ</given-names></name></person-group>             <year>2009</year>             <article-title>Viral biogeography revealed by signatures in <italic>Sulfolobus islandicus</italic> genomes.</article-title>             <source>Environ Microbiol</source>             <volume>11</volume>             <fpage>457</fpage>             <lpage>466</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Pfeiffer1">
        <label>21</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pfeiffer</surname><given-names>F</given-names></name><name name-style="western"><surname>Schuster</surname><given-names>SC</given-names></name><name name-style="western"><surname>Broicher</surname><given-names>A</given-names></name><name name-style="western"><surname>Falb</surname><given-names>M</given-names></name><name name-style="western"><surname>Palm</surname><given-names>P</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Evolution in the laboratory: the genome of <italic>Halobacterium salinarum</italic> strain R1 compared to that of strain NRC-1.</article-title>             <source>Genomics</source>             <volume>91</volume>             <fpage>335</fpage>             <lpage>346</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Shimizu1">
        <label>22</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Shimizu</surname><given-names>T</given-names></name><name name-style="western"><surname>Ohtani</surname><given-names>K</given-names></name><name name-style="western"><surname>Hirakawa</surname><given-names>H</given-names></name><name name-style="western"><surname>Ohshima</surname><given-names>K</given-names></name><name name-style="western"><surname>Yamashita</surname><given-names>A</given-names></name><etal/></person-group>             <year>2002</year>             <article-title>Complete genome sequence of <italic>Clostridium perfringens</italic>, an anaerobic flesh-eater.</article-title>             <source>Proc Natl Acad Sci U S A</source>             <volume>99</volume>             <fpage>996</fpage>             <lpage>1001</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-VieiraSilva1">
        <label>23</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Vieira-Silva</surname><given-names>S</given-names></name><name name-style="western"><surname>Rocha</surname><given-names>EP</given-names></name></person-group>             <year>2010</year>             <article-title>The systemic imprint of growth and its uses in ecological (meta)genomics.</article-title>             <source>PLoS Genet</source>             <volume>6</volume>             <fpage>e1000808</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Lobry1">
        <label>24</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Lobry</surname><given-names>JR</given-names></name></person-group>             <year>1996</year>             <article-title>A simple vectorial representation of DNA sequences for the detection of replication origins in bacteria.</article-title>             <source>Biochimie</source>             <volume>78</volume>             <fpage>323</fpage>             <lpage>326</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Lopez1">
        <label>25</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Lopez</surname><given-names>P</given-names></name><name name-style="western"><surname>Philippe</surname><given-names>H</given-names></name><name name-style="western"><surname>Myllykallio</surname><given-names>H</given-names></name><name name-style="western"><surname>Forterre</surname><given-names>P</given-names></name></person-group>             <year>1999</year>             <article-title>Identification of putative chromosomal origins of replication in Archaea.</article-title>             <source>Mol Microbiol</source>             <volume>32</volume>             <fpage>883</fpage>             <lpage>886</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Gao1">
        <label>26</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Gao</surname><given-names>F</given-names></name><name name-style="western"><surname>Zhang</surname><given-names>CT</given-names></name></person-group>             <year>2006</year>             <article-title>GC-Profile: a web-based tool for visualizing and analyzing the variation of GC content in genomic sequences.</article-title>             <source>Nucleic Acids Res</source>             <volume>34</volume>             <fpage>W686</fpage>             <lpage>691</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Myllykallio1">
        <label>27</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Myllykallio</surname><given-names>H</given-names></name><name name-style="western"><surname>Lopez</surname><given-names>P</given-names></name><name name-style="western"><surname>Lopez-Garcia</surname><given-names>P</given-names></name><name name-style="western"><surname>Heilig</surname><given-names>R</given-names></name><name name-style="western"><surname>Saurin</surname><given-names>W</given-names></name><etal/></person-group>             <year>2000</year>             <article-title>Bacterial mode of replication with eukaryotic-like machinery in a hyperthermophilic archaeon.</article-title>             <source>Science</source>             <volume>288</volume>             <fpage>2212</fpage>             <lpage>2215</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Coker1">
        <label>28</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Coker</surname><given-names>JA</given-names></name><name name-style="western"><surname>DasSarma</surname><given-names>P</given-names></name><name name-style="western"><surname>Capes</surname><given-names>M</given-names></name><name name-style="western"><surname>Wallace</surname><given-names>T</given-names></name><name name-style="western"><surname>McGarrity</surname><given-names>K</given-names></name><etal/></person-group>             <year>2009</year>             <article-title>Multiple replication origins of <italic>Halobacterium</italic> sp. strain NRC-1: properties of the conserved orc7-dependent oriC1.</article-title>             <source>J Bacteriol</source>             <volume>191</volume>             <fpage>5253</fpage>             <lpage>5261</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Norais1">
        <label>29</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Norais</surname><given-names>C</given-names></name><name name-style="western"><surname>Hawkins</surname><given-names>M</given-names></name><name name-style="western"><surname>Hartman</surname><given-names>AL</given-names></name><name name-style="western"><surname>Eisen</surname><given-names>JA</given-names></name><name name-style="western"><surname>Myllykallio</surname><given-names>H</given-names></name><etal/></person-group>             <year>2007</year>             <article-title>Genetic and physical mapping of DNA replication origins in <italic>Haloferax volcanii</italic>.</article-title>             <source>PLoS Genet</source>             <volume>3</volume>             <fpage>e77</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Barry1">
        <label>30</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Barry</surname><given-names>ER</given-names></name><name name-style="western"><surname>Bell</surname><given-names>SD</given-names></name></person-group>             <year>2006</year>             <article-title>DNA replication in the Archaea.</article-title>             <source>Microbiol Mol Biol Rev</source>             <volume>70</volume>             <fpage>876</fpage>             <lpage>887</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Berquist1">
        <label>31</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Berquist</surname><given-names>BR</given-names></name><name name-style="western"><surname>DasSarma</surname><given-names>P</given-names></name><name name-style="western"><surname>DasSarma</surname><given-names>S</given-names></name></person-group>             <year>2007</year>             <article-title>Essential and non-essential DNA replication genes in the model halophilic archaeon, <italic>Halobacterium</italic> sp. NRC-1.</article-title>             <source>BMC Genet</source>             <volume>8</volume>             <fpage>31</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Robinson1">
        <label>32</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Robinson</surname><given-names>NP</given-names></name><name name-style="western"><surname>Bell</surname><given-names>SD</given-names></name></person-group>             <year>2007</year>             <article-title>Extrachromosomal element capture and the evolution of multiple replication origins in archaeal chromosomes.</article-title>             <source>Proc Natl Acad Sci U S A</source>             <volume>104</volume>             <fpage>5806</fpage>             <lpage>5811</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Reno1">
        <label>33</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Reno</surname><given-names>ML</given-names></name><name name-style="western"><surname>Held</surname><given-names>NL</given-names></name><name name-style="western"><surname>Fields</surname><given-names>CJ</given-names></name><name name-style="western"><surname>Burke</surname><given-names>PV</given-names></name><name name-style="western"><surname>Whitaker</surname><given-names>RJ</given-names></name></person-group>             <year>2009</year>             <article-title>Biogeography of the <italic>Sulfolobus islandicus</italic> pan-genome.</article-title>             <source>Proc Natl Acad Sci U S A</source>             <volume>106</volume>             <fpage>8605</fpage>             <lpage>8610</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Agris1">
        <label>34</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Agris</surname><given-names>PF</given-names></name><name name-style="western"><surname>Vendeix</surname><given-names>FA</given-names></name><name name-style="western"><surname>Graham</surname><given-names>WD</given-names></name></person-group>             <year>2007</year>             <article-title>tRNA's wobble decoding of the genome: 40 years of modification.</article-title>             <source>J Mol Biol</source>             <volume>366</volume>             <fpage>1</fpage>             <lpage>13</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Grosjean1">
        <label>35</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Grosjean</surname><given-names>H</given-names></name><name name-style="western"><surname>Marck</surname><given-names>C</given-names></name><name name-style="western"><surname>Gaspin</surname><given-names>C</given-names></name><name name-style="western"><surname>Decatur</surname><given-names>WA</given-names></name><name name-style="western"><surname>de Crécy-Lagard</surname><given-names>V</given-names></name></person-group>             <year>2008</year>             <article-title>RNomics and Modomics in the halophilic archaea <italic>Haloferax volcanii</italic>: identification of RNA modification genes.</article-title>             <source>BMC Genomics</source>             <volume>9</volume>             <fpage>470</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Ikeuchi1">
        <label>36</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Ikeuchi</surname><given-names>Y</given-names></name><name name-style="western"><surname>Kimura</surname><given-names>S</given-names></name><name name-style="western"><surname>Numata</surname><given-names>T</given-names></name><name name-style="western"><surname>Nakamura</surname><given-names>D</given-names></name><name name-style="western"><surname>Yokogawa</surname><given-names>T</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>Agmatine-conjugated cytidine in a tRNA anticodon is essential for AUA decoding in archaea.</article-title>             <source>Nat Chem Biol</source>             <volume>6</volume>             <fpage>277</fpage>             <lpage>282</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Greve1">
        <label>37</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Greve</surname><given-names>B</given-names></name><name name-style="western"><surname>Jensen</surname><given-names>S</given-names></name><name name-style="western"><surname>Phan</surname><given-names>H</given-names></name><name name-style="western"><surname>Brugger</surname><given-names>K</given-names></name><name name-style="western"><surname>Zillig</surname><given-names>W</given-names></name><etal/></person-group>             <year>2005</year>             <article-title>Novel RepA-MCM proteins encoded in plasmids pTAU4, pORA1 and pTIK4 from <italic>Sulfolobus neozealandicus</italic>.</article-title>             <source>Archaea</source>             <volume>1</volume>             <fpage>319</fpage>             <lpage>325</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Soler1">
        <label>38</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Soler</surname><given-names>N</given-names></name><name name-style="western"><surname>Marguet</surname><given-names>E</given-names></name><name name-style="western"><surname>Cortez</surname><given-names>D</given-names></name><name name-style="western"><surname>Desnoues</surname><given-names>N</given-names></name><name name-style="western"><surname>Keller</surname><given-names>J</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>Two novel families of plasmids from hyperthermophilic archaea encoding new families of replication proteins.</article-title>             <publisher-name>Nucleic Acids Res</publisher-name>          </element-citation>
      </ref>
      <ref id="pone.0020968-Bath1">
        <label>39</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Bath</surname><given-names>C</given-names></name><name name-style="western"><surname>Cukalac</surname><given-names>T</given-names></name><name name-style="western"><surname>Porter</surname><given-names>K</given-names></name><name name-style="western"><surname>Dyall-Smith</surname><given-names>M</given-names></name></person-group>             <year>2006</year>             <article-title>His1 and His2 are distantly related, spindle-shaped haloviruses belonging to the novel virus group, <italic>Salterprovirus</italic>.</article-title>             <source>Virology</source>             <volume>350</volume>             <fpage>228</fpage>             <lpage>239</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Holmes1">
        <label>40</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Holmes</surname><given-names>ML</given-names></name><name name-style="western"><surname>Dyall-Smith</surname><given-names>ML</given-names></name></person-group>             <year>1990</year>             <article-title>A plasmid vector with a selectable marker for halophilic archaebacteria.</article-title>             <source>J Bacteriol</source>             <volume>172</volume>             <fpage>756</fpage>             <lpage>761</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Holmes2">
        <label>41</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Holmes</surname><given-names>ML</given-names></name><name name-style="western"><surname>Pfeifer</surname><given-names>F</given-names></name><name name-style="western"><surname>Dyall-Smith</surname><given-names>ML</given-names></name></person-group>             <year>1995</year>             <article-title>Analysis of the halobacterial plasmid pHK2 minimal replicon.</article-title>             <source>Gene</source>             <volume>153</volume>             <fpage>117</fpage>             <lpage>121</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Pietila1">
        <label>42</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pietila</surname><given-names>MK</given-names></name><name name-style="western"><surname>Roine</surname><given-names>E</given-names></name><name name-style="western"><surname>Paulin</surname><given-names>L</given-names></name><name name-style="western"><surname>Kalkkinen</surname><given-names>N</given-names></name><name name-style="western"><surname>Bamford</surname><given-names>DH</given-names></name></person-group>             <year>2009</year>             <article-title>An ssDNA virus infecting archaea: a new lineage of viruses with a membrane envelope.</article-title>             <source>Mol Microbiol</source>             <volume>72</volume>             <fpage>307</fpage>             <lpage>319</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Roine1">
        <label>43</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Roine</surname><given-names>E</given-names></name><name name-style="western"><surname>Kukkaro</surname><given-names>P</given-names></name><name name-style="western"><surname>Paulin</surname><given-names>L</given-names></name><name name-style="western"><surname>Laurinavičius</surname><given-names>S</given-names></name><name name-style="western"><surname>Domanska</surname><given-names>A</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>New, closely related haloarchaeal viral elements with different nucleic acid types.</article-title>             <source>J Virol</source>             <volume>84</volume>             <fpage>3682</fpage>             <lpage>3689</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Pritham1">
        <label>44</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pritham</surname><given-names>EJ</given-names></name><name name-style="western"><surname>Putliwala</surname><given-names>T</given-names></name><name name-style="western"><surname>Feschotte</surname><given-names>C</given-names></name></person-group>             <year>2007</year>             <article-title>Mavericks, a novel class of giant transposable elements widespread in eukaryotes and related to DNA viruses.</article-title>             <source>Gene</source>             <volume>390</volume>             <fpage>3</fpage>             <lpage>17</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Lukjancenko1">
        <label>45</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Lukjancenko</surname><given-names>O</given-names></name><name name-style="western"><surname>Wassenaar</surname><given-names>TM</given-names></name><name name-style="western"><surname>Ussery</surname><given-names>DW</given-names></name></person-group>             <year>2010</year>             <article-title>Comparison of 61 sequenced <italic>Escherichia coli</italic> genomes.</article-title>             <source>Microb Ecol</source>             <volume>60</volume>             <fpage>708</fpage>             <lpage>720</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Kers1">
        <label>46</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kers</surname><given-names>JA</given-names></name><name name-style="western"><surname>Cameron</surname><given-names>KD</given-names></name><name name-style="western"><surname>Joshi</surname><given-names>MV</given-names></name><name name-style="western"><surname>Bukhalid</surname><given-names>RA</given-names></name><name name-style="western"><surname>Morello</surname><given-names>JE</given-names></name><etal/></person-group>             <year>2005</year>             <article-title>A large, mobile pathogenicity island confers plant pathogenicity on <italic>Streptomyces</italic> species.</article-title>             <source>Mol Microbiol</source>             <volume>55</volume>             <fpage>1025</fpage>             <lpage>1033</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-vanSchaik1">
        <label>47</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>van Schaik</surname><given-names>W</given-names></name><name name-style="western"><surname>Top</surname><given-names>J</given-names></name><name name-style="western"><surname>Riley</surname><given-names>DR</given-names></name><name name-style="western"><surname>Boekhorst</surname><given-names>J</given-names></name><name name-style="western"><surname>Vrijenhoek</surname><given-names>JE</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>Pyrosequencing-based comparative genome analysis of the nosocomial pathogen <italic>Enterococcus faecium</italic> and identification of a large transferable pathogenicity island.</article-title>             <source>BMC Genomics</source>             <volume>11</volume>             <fpage>239</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Delmas1">
        <label>48</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Delmas</surname><given-names>S</given-names></name><name name-style="western"><surname>Shunburne</surname><given-names>L</given-names></name><name name-style="western"><surname>Ngo</surname><given-names>HP</given-names></name><name name-style="western"><surname>Allers</surname><given-names>T</given-names></name></person-group>             <year>2009</year>             <article-title>Mre11-Rad50 promotes rapid repair of DNA damage in the polyploid archaeon <italic>Haloferax volcanii</italic> by restraining homologous recombination.</article-title>             <source>PLoS Genet</source>             <volume>5</volume>             <fpage>e1000552</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-McVey1">
        <label>49</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>McVey</surname><given-names>M</given-names></name><name name-style="western"><surname>Lee</surname><given-names>SE</given-names></name></person-group>             <year>2008</year>             <article-title>MMEJ repair of double-strand breaks (director's cut): deleted sequences and alternative endings.</article-title>             <source>Trends Genet</source>             <volume>24</volume>             <fpage>529</fpage>             <lpage>538</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Pfeifer1">
        <label>50</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pfeifer</surname><given-names>F</given-names></name><name name-style="western"><surname>Weidinger</surname><given-names>G</given-names></name><name name-style="western"><surname>Goebel</surname><given-names>W</given-names></name></person-group>             <year>1981</year>             <article-title>Genetic variability in <italic>Halobacterium halobium</italic>.</article-title>             <source>J Bacteriol</source>             <volume>145</volume>             <fpage>375</fpage>             <lpage>381</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Santos1">
        <label>51</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Santos</surname><given-names>F</given-names></name><name name-style="western"><surname>Yarza</surname><given-names>P</given-names></name><name name-style="western"><surname>Parro</surname><given-names>V</given-names></name><name name-style="western"><surname>Briones</surname><given-names>C</given-names></name><name name-style="western"><surname>Anton</surname><given-names>J</given-names></name></person-group>             <year>2010</year>             <article-title>The metavirome of a hypersaline environment.</article-title>             <publisher-name>Environ Microbiol</publisher-name>          </element-citation>
      </ref>
      <ref id="pone.0020968-Tang1">
        <label>52</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>SL</given-names></name><name name-style="western"><surname>Nuttall</surname><given-names>S</given-names></name><name name-style="western"><surname>Dyall-Smith</surname><given-names>M</given-names></name></person-group>             <year>2004</year>             <article-title>Haloviruses HF1 and HF2: evidence for a recent and large recombination event.</article-title>             <source>J Bacteriol</source>             <volume>186</volume>             <fpage>2810</fpage>             <lpage>2817</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Gudbergsdottir1">
        <label>53</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Gudbergsdottir</surname><given-names>S</given-names></name><name name-style="western"><surname>Deng</surname><given-names>L</given-names></name><name name-style="western"><surname>Chen</surname><given-names>Z</given-names></name><name name-style="western"><surname>Jensen</surname><given-names>JV</given-names></name><name name-style="western"><surname>Jensen</surname><given-names>LR</given-names></name><etal/></person-group>             <year>2011</year>             <article-title>Dynamic properties of the <italic>Sulfolobus</italic> CRISPR/Cas and CRISPR/Cmr systems when challenged with vector-borne viral and plasmid genes and protospacers.</article-title>             <source>Mol Microbiol</source>             <volume>79</volume>             <fpage>35</fpage>             <lpage>49</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Stern1">
        <label>54</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Stern</surname><given-names>A</given-names></name><name name-style="western"><surname>Keren</surname><given-names>L</given-names></name><name name-style="western"><surname>Wurtzel</surname><given-names>O</given-names></name><name name-style="western"><surname>Amitai</surname><given-names>G</given-names></name><name name-style="western"><surname>Sorek</surname><given-names>R</given-names></name></person-group>             <year>2010</year>             <article-title>Self-targeting by CRISPR: gene regulation or autoimmunity?</article-title>             <source>Trends Genet</source>             <volume>26</volume>             <fpage>335</fpage>             <lpage>340</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Deveau1">
        <label>55</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Deveau</surname><given-names>H</given-names></name><name name-style="western"><surname>Barrangou</surname><given-names>R</given-names></name><name name-style="western"><surname>Garneau</surname><given-names>JE</given-names></name><name name-style="western"><surname>Labonte</surname><given-names>J</given-names></name><name name-style="western"><surname>Fremaux</surname><given-names>C</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Phage response to CRISPR-encoded resistance in <italic>Streptococcus thermophilus</italic>.</article-title>             <source>J Bacteriol</source>             <volume>190</volume>             <fpage>1390</fpage>             <lpage>1400</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Touchon1">
        <label>56</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Touchon</surname><given-names>M</given-names></name><name name-style="western"><surname>Rocha</surname><given-names>E</given-names></name></person-group>             <year>2010</year>             <article-title>The small, slow and specialized CRISPR and anti-CRISPR of <italic>Escherichia</italic> and <italic>Salmonella</italic>.</article-title>             <source>PLoS ONE </source>             <volume>5</volume>             <fpage>e11126</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Horvath1">
        <label>57</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Horvath</surname><given-names>P</given-names></name><name name-style="western"><surname>Coute-Monvoisin</surname><given-names>AC</given-names></name><name name-style="western"><surname>Romero</surname><given-names>DA</given-names></name><name name-style="western"><surname>Boyaval</surname><given-names>P</given-names></name><name name-style="western"><surname>Fremaux</surname><given-names>C</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Comparative analysis of CRISPR loci in lactic acid bacteria genomes.</article-title>             <publisher-name>Int J Food Microbiol</publisher-name>          </element-citation>
      </ref>
      <ref id="pone.0020968-YuristDoutsch1">
        <label>58</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Yurist-Doutsch</surname><given-names>S</given-names></name><name name-style="western"><surname>Magidovich</surname><given-names>H</given-names></name><name name-style="western"><surname>Ventura</surname><given-names>VV</given-names></name><name name-style="western"><surname>Hitchen</surname><given-names>PG</given-names></name><name name-style="western"><surname>Dell</surname><given-names>A</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>N-glycosylation in Archaea: on the coordinated actions of <italic>Haloferax volcanii</italic> AglF and AglM.</article-title>             <source>Mol Microbiol</source>             <volume>75</volume>             <fpage>1047</fpage>             <lpage>1058</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Sumper1">
        <label>59</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Sumper</surname><given-names>M</given-names></name><name name-style="western"><surname>Berg</surname><given-names>E</given-names></name><name name-style="western"><surname>Mengele</surname><given-names>R</given-names></name><name name-style="western"><surname>Strobel</surname><given-names>I</given-names></name></person-group>             <year>1990</year>             <article-title>Primary structure and glycosylation of the S-layer protein of <italic>Haloferax volcanii</italic>.</article-title>             <source>J Bacteriol</source>             <volume>172</volume>             <fpage>7111</fpage>             <lpage>7118</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Oesterhelt1">
        <label>60</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Oesterhelt</surname><given-names>D</given-names></name><name name-style="western"><surname>Stoeckenius</surname><given-names>W</given-names></name></person-group>             <year>1974</year>             <article-title>Isolation of the cell membrane of <italic>Halobacterium halobium</italic> and its fractionation into red and purple membrane.</article-title>             <source>Methods Enzymol</source>             <volume>31</volume>             <fpage>667</fpage>             <lpage>678</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Fu1">
        <label>61</label>
        <element-citation publication-type="other" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Fu</surname><given-names>HY</given-names></name><name name-style="western"><surname>Lin</surname><given-names>YC</given-names></name><name name-style="western"><surname>Chang</surname><given-names>YN</given-names></name><name name-style="western"><surname>Tseng</surname><given-names>H</given-names></name><name name-style="western"><surname>Huang</surname><given-names>CC</given-names></name><etal/></person-group>             <year>2010</year>             <article-title>A novel six-rhodopsin system in a single archaeon.</article-title>             <publisher-name>J Bacteriol</publisher-name>          </element-citation>
      </ref>
      <ref id="pone.0020968-Pagaling1">
        <label>62</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pagaling</surname><given-names>E</given-names></name><name name-style="western"><surname>Haigh</surname><given-names>RD</given-names></name><name name-style="western"><surname>Grant</surname><given-names>WD</given-names></name><name name-style="western"><surname>Cowan</surname><given-names>DA</given-names></name><name name-style="western"><surname>Jones</surname><given-names>BE</given-names></name><etal/></person-group>             <year>2007</year>             <article-title>Sequence analysis of an archaeal virus isolated from a hypersaline lake in Inner Mongolia, China.</article-title>             <source>BMC Genomics</source>             <volume>8</volume>             <fpage>410</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Szabo1">
        <label>63</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Szabo</surname><given-names>Z</given-names></name><name name-style="western"><surname>Stahl</surname><given-names>AO</given-names></name><name name-style="western"><surname>Albers</surname><given-names>SV</given-names></name><name name-style="western"><surname>Kissinger</surname><given-names>JC</given-names></name><name name-style="western"><surname>Driessen</surname><given-names>AJ</given-names></name><etal/></person-group>             <year>2007</year>             <article-title>Identification of diverse archaeal proteins with class III signal peptides cleaved by distinct archaeal prepilin peptidases.</article-title>             <source>J Bacteriol</source>             <volume>189</volume>             <fpage>772</fpage>             <lpage>778</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Ellen1">
        <label>64</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Ellen</surname><given-names>AF</given-names></name><name name-style="western"><surname>Zolghadr</surname><given-names>B</given-names></name><name name-style="western"><surname>Driessen</surname><given-names>AM</given-names></name><name name-style="western"><surname>Albers</surname><given-names>SV</given-names></name></person-group>             <year>2010</year>             <article-title>Shaping the archaeal cell envelope.</article-title>             <source>Archaea</source>             <volume>2010</volume>             <fpage>608243</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Tang2">
        <label>65</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Tang</surname><given-names>SL</given-names></name><name name-style="western"><surname>Nuttall</surname><given-names>S</given-names></name><name name-style="western"><surname>Ngui</surname><given-names>K</given-names></name><name name-style="western"><surname>Fisher</surname><given-names>C</given-names></name><name name-style="western"><surname>Lopez</surname><given-names>P</given-names></name><etal/></person-group>             <year>2002</year>             <article-title>HF2: a double-stranded DNA tailed haloarchaeal virus with a mosaic genome.</article-title>             <source>Mol Microbiol</source>             <volume>44</volume>             <fpage>283</fpage>             <lpage>296</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Casjens1">
        <label>66</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Casjens</surname><given-names>S</given-names></name></person-group>             <year>2003</year>             <article-title>Prophages and bacterial genomics: what have we learned so far?</article-title>             <source>Mol Microbiol</source>             <volume>49</volume>             <fpage>277</fpage>             <lpage>300</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Hickman1">
        <label>67</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Hickman</surname><given-names>AB</given-names></name><name name-style="western"><surname>Chandler</surname><given-names>M</given-names></name><name name-style="western"><surname>Dyda</surname><given-names>F</given-names></name></person-group>             <year>2010</year>             <article-title>Integrating prokaryotes and eukaryotes: DNA transposases in light of structure.</article-title>             <source>Crit Rev Biochem Mol Biol</source>             <volume>45</volume>             <fpage>50</fpage>             <lpage>69</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Siguier1">
        <label>68</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Siguier</surname><given-names>P</given-names></name><name name-style="western"><surname>Perochon</surname><given-names>J</given-names></name><name name-style="western"><surname>Lestrade</surname><given-names>L</given-names></name><name name-style="western"><surname>Mahillon</surname><given-names>J</given-names></name><name name-style="western"><surname>Chandler</surname><given-names>M</given-names></name></person-group>             <year>2006</year>             <article-title>ISfinder: the reference centre for bacterial insertion sequences.</article-title>             <source>Nucleic Acids Res</source>             <volume>34</volume>             <fpage>D32</fpage>             <lpage>36</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Chen1">
        <label>69</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Chen</surname><given-names>Y</given-names></name><name name-style="western"><surname>Zhou</surname><given-names>F</given-names></name><name name-style="western"><surname>Li</surname><given-names>G</given-names></name><name name-style="western"><surname>Xu</surname><given-names>Y</given-names></name></person-group>             <year>2009</year>             <article-title>MUST: a system for identification of miniature inverted-repeat transposable elements and applications to <italic>Anabaena variabilis</italic> and <italic>Haloquadratum walsbyi</italic>.</article-title>             <source>Gene</source>             <volume>436</volume>             <fpage>1</fpage>             <lpage>7</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Klein1">
        <label>70</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Klein</surname><given-names>C</given-names></name><name name-style="western"><surname>Aivaliotis</surname><given-names>M</given-names></name><name name-style="western"><surname>Olsen</surname><given-names>JV</given-names></name><name name-style="western"><surname>Falb</surname><given-names>M</given-names></name><name name-style="western"><surname>Besir</surname><given-names>H</given-names></name><etal/></person-group>             <year>2007</year>             <article-title>The low molecular weight proteome of <italic>Halobacterium salinarum</italic>.</article-title>             <source>J Proteome Res</source>             <volume>6</volume>             <fpage>1510</fpage>             <lpage>1518</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-TonHoang1">
        <label>71</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Ton-Hoang</surname><given-names>B</given-names></name><name name-style="western"><surname>Guynet</surname><given-names>C</given-names></name><name name-style="western"><surname>Ronning</surname><given-names>DR</given-names></name><name name-style="western"><surname>Cointin-Marty</surname><given-names>B</given-names></name><name name-style="western"><surname>Dyda</surname><given-names>F</given-names></name><etal/></person-group>             <year>2005</year>             <article-title>Transposition of ISHp608, member of an unusual family of bacterial insertion sequences.</article-title>             <source>EMBO J</source>             <volume>24</volume>             <fpage>3325</fpage>             <lpage>3338</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Kersulyte1">
        <label>72</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Kersulyte</surname><given-names>D</given-names></name><name name-style="western"><surname>Velapatino</surname><given-names>B</given-names></name><name name-style="western"><surname>Dailide</surname><given-names>G</given-names></name><name name-style="western"><surname>Mukhopadhyay</surname><given-names>AK</given-names></name><name name-style="western"><surname>Ito</surname><given-names>Y</given-names></name><etal/></person-group>             <year>2002</year>             <article-title>Transposable element ISHp608 of <italic>Helicobacter pylori</italic>: nonrandom geographic distribution, functional organization, and insertion specificity.</article-title>             <source>J Bacteriol</source>             <volume>184</volume>             <fpage>992</fpage>             <lpage>1002</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Falb2">
        <label>73</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Falb</surname><given-names>M</given-names></name><name name-style="western"><surname>Pfeiffer</surname><given-names>F</given-names></name><name name-style="western"><surname>Palm</surname><given-names>P</given-names></name><name name-style="western"><surname>Rodewald</surname><given-names>K</given-names></name><name name-style="western"><surname>Hickmann</surname><given-names>V</given-names></name><etal/></person-group>             <year>2005</year>             <article-title>Living with two extremes: conclusions from the genome sequence of <italic>Natronomonas pharaonis.</italic></article-title>             <source>Genome Res</source>             <volume>15</volume>             <fpage>1336</fpage>             <lpage>1343</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Gordon1">
        <label>74</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Gordon</surname><given-names>D</given-names></name></person-group>             <year>2003</year>             <article-title>Viewing and editing assembled sequences using Consed.</article-title>             <source>Curr Protoc Bioinformatics Chapter</source>             <volume>11</volume>             <fpage>Unit11 12</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Pfeiffer2">
        <label>75</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Pfeiffer</surname><given-names>F</given-names></name><name name-style="western"><surname>Broicher</surname><given-names>A</given-names></name><name name-style="western"><surname>Gillich</surname><given-names>T</given-names></name><name name-style="western"><surname>Klee</surname><given-names>K</given-names></name><name name-style="western"><surname>Mejia</surname><given-names>J</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Genome information management and integrated data analysis with HaloLex.</article-title>             <source>Arch Microbiol</source>             <volume>190</volume>             <fpage>281</fpage>             <lpage>299</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Rampp1">
        <label>76</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Rampp</surname><given-names>M</given-names></name><name name-style="western"><surname>Soddemann</surname><given-names>T</given-names></name><name name-style="western"><surname>Lederer</surname><given-names>H</given-names></name></person-group>             <year>2006</year>             <article-title>The MIGenAS integrated bioinformatics toolkit for web-based sequence analysis.</article-title>             <source>Nucleic Acids Res</source>             <volume>34</volume>             <fpage>W15</fpage>             <lpage>19</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Teeling1">
        <label>77</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Teeling</surname><given-names>H</given-names></name><name name-style="western"><surname>Waldmann</surname><given-names>J</given-names></name><name name-style="western"><surname>Lombardot</surname><given-names>T</given-names></name><name name-style="western"><surname>Bauer</surname><given-names>M</given-names></name><name name-style="western"><surname>Glockner</surname><given-names>FO</given-names></name></person-group>             <year>2004</year>             <article-title>TETRA: a web-service and a stand-alone program for the analysis and comparison of tetranucleotide usage patterns in DNA sequences.</article-title>             <source>BMC Bioinformatics</source>             <volume>5</volume>             <fpage>163</fpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Rutherford1">
        <label>78</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Rutherford</surname><given-names>K</given-names></name><name name-style="western"><surname>Parkhill</surname><given-names>J</given-names></name><name name-style="western"><surname>Crook</surname><given-names>J</given-names></name><name name-style="western"><surname>Horsnell</surname><given-names>T</given-names></name><name name-style="western"><surname>Rice</surname><given-names>P</given-names></name><etal/></person-group>             <year>2000</year>             <article-title>Artemis: sequence visualization and annotation.</article-title>             <source>Bioinformatics</source>             <volume>16</volume>             <fpage>944</fpage>             <lpage>945</lpage>          </element-citation>
      </ref>
      <ref id="pone.0020968-Dinsdale1">
        <label>79</label>
        <element-citation publication-type="journal" xlink:type="simple">             <person-group person-group-type="author"><name name-style="western"><surname>Dinsdale</surname><given-names>EA</given-names></name><name name-style="western"><surname>Edwards</surname><given-names>RA</given-names></name><name name-style="western"><surname>Hall</surname><given-names>D</given-names></name><name name-style="western"><surname>Angly</surname><given-names>F</given-names></name><name name-style="western"><surname>Breitbart</surname><given-names>M</given-names></name><etal/></person-group>             <year>2008</year>             <article-title>Functional metagenomic profiling of nine biomes.</article-title>             <source>Nature</source>             <volume>452</volume>             <fpage>629</fpage>             <lpage>632</lpage>          </element-citation>
      </ref>
    </ref-list>
    
  </back>
</article>