<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1006847</article-id>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-18-01301</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Biological databases</subject><subj-group><subject>Genomic databases</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Genomic databases</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Genomic databases</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Database searching</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Comparative genomics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Comparative genomics</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Bioinformatics</subject><subj-group><subject>Sequence analysis</subject><subj-group><subject>Sequence motif analysis</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Computer applications</subject><subj-group><subject>Web-based applications</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Microbiology</subject><subj-group><subject>Bacteriology</subject><subj-group><subject>Bacterial physiology</subject><subj-group><subject>Secretion systems</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Microbiology</subject><subj-group><subject>Microbial physiology</subject><subj-group><subject>Bacterial physiology</subject><subj-group><subject>Secretion systems</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Pathology and laboratory medicine</subject><subj-group><subject>Pathogens</subject><subj-group><subject>Virulence factors</subject><subj-group><subject>Secretion systems</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Microbiology</subject><subj-group><subject>Bacteriology</subject><subj-group><subject>Bacterial genetics</subject><subj-group><subject>Bacterial genomics</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Microbial genetics</subject><subj-group><subject>Bacterial genetics</subject><subj-group><subject>Bacterial genomics</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Microbial genomics</subject><subj-group><subject>Bacterial genomics</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Microbiology</subject><subj-group><subject>Microbial genomics</subject><subj-group><subject>Bacterial genomics</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Searching algorithm for Type IV effector proteins (S4TE) 2.0: Improved tools for Type IV effector prediction, analysis and comparison in proteobacteria</article-title>
<alt-title alt-title-type="running-head">Searching algorithm for Type IV effectors prediction and comparison in proteobacteria</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Noroy</surname>
<given-names>Christophe</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Validation</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lefrançois</surname>
<given-names>Thierry</given-names>
</name>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Project administration</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-2735-176X</contrib-id>
<name name-style="western">
<surname>Meyer</surname>
<given-names>Damien F.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Investigation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Project administration</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Validation</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>CIRAD, UMR ASTRE, Petit-Bourg, Guadeloupe, France</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>ASTRE, Univ Montpellier, CIRAD, INRA, Montpellier, France</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Université des Antilles, Pointe-à-Pitre, Guadeloupe, France</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Schneidman-Duhovny</surname>
<given-names>Dina</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Hebrew University of Jerusalem, ISRAEL</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">damien.meyer@cirad.fr</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>25</day>
<month>3</month>
<year>2019</year>
</pub-date>
<pub-date pub-type="collection">
<month>3</month>
<year>2019</year>
</pub-date>
<volume>15</volume>
<issue>3</issue>
<elocation-id>e1006847</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>7</month>
<year>2018</year>
</date>
<date date-type="accepted">
<day>5</day>
<month>2</month>
<year>2019</year>
</date>
</history>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Noroy et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1006847"/>
<abstract>
<p>Bacterial pathogens have evolved numerous strategies to corrupt, hijack or mimic cellular processes in order to survive and proliferate. Among those strategies, Type IV effectors (T4Es) are proteins secreted by pathogenic bacteria to manipulate host cell processes during infection. They are delivered into eukaryotic cells in an ATP-dependent manner via the type IV secretion system, a specialized multiprotein complex. T4Es contain a wide spectrum of features including eukaryotic-like domains, localization signals or a C-terminal translocation signal. A combination of these features enables prediction of T4Es in a given bacterial genome. In this study, we developed a web-based comprehensive suite of tools with a user-friendly graphical interface. This version 2.0 of S4TE (Searching Algorithm for Type IV Effector Proteins; <ext-link ext-link-type="uri" xlink:href="http://sate.cirad.fr" xlink:type="simple">http://sate.cirad.fr</ext-link>) enables accurate prediction and comparison of T4Es. Search parameters and threshold can be customized by the user to work with any genome sequence, whether publicly available or not. Applications range from characterizing effector features and identifying potential T4Es to analyzing the effectors based on the genome G+C composition and local gene density. S4TE 2.0 allows the comparison of putative T4E repertoires of up to four bacterial strains at the same time. The software identifies T4E orthologs among strains and provides a Venn diagram and lists of genes for each intersection. New interactive features offer the best visualization of the location of candidate T4Es and hyperlinks to NCBI and Pfam databases. S4TE 2.0 is designed to evolve rapidly with the publication of new experimentally validated T4Es, which will reinforce the predictive power of the algorithm. The computational methodology can be used to identify a wide spectrum of candidate bacterial effectors that lack sequence conservation but have similar amino acid characteristics. This approach will provide very valuable information about bacterial host-specificity and virulence factors and help identify host targets for the development of new anti-bacterial molecules.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100008530</institution-id>
<institution>European Regional Development Fund</institution>
</institution-wrap>
</funding-source>
<award-id>2015-FED-186</award-id>
</award-group>
<funding-statement>The authors acknowledge the financial support from European Regional Development Fund, n° 2015-FED-186, MALIN. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="2"/>
<page-count count="12"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>PLOS Publication Stage</meta-name>
<meta-value>vor-update-to-uncorrected-proof</meta-value>
</custom-meta>
<custom-meta>
<meta-name>Publication Update</meta-name>
<meta-value>2019-04-04</meta-value>
</custom-meta>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All relevant data are within the paper and its Supporting Information files. The software is available at <ext-link ext-link-type="uri" xlink:href="https://zenodo.org/record/1418392#.W5qloqZKiUk" xlink:type="simple">https://zenodo.org/record/1418392#.W5qloqZKiUk</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<p>This is a <italic>PLOS Computational Biology</italic> Software paper.</p>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Proteobacteria have evolved specific effector proteins to manipulate host cell gene expression and processes, hijack immune responses and exploit host cell machinery during infection. These proteins are secreted by ATP-dependent protein complexes named type IV secretion systems (T4SS). Some T4Es have been identified and shown to be crucial for pathogenicity. To facilitate the identification of putative T4Es, we previously developed a bioinformatics tool called S4TE 1.0 (Searching Algorithm for Type IV secretion system effector proteins) [<xref ref-type="bibr" rid="pcbi.1006847.ref001">1</xref>].</p>
<p>In the present article, we present the second version of ‘S4TE’. S4TE 2.0 is a tool for <italic>in silico</italic> screening of proteobacteria genomes and T4E prediction based on the combined use of 14 distinctive features. In this updated version, modules searching for promoter motifs, homology, NLS, MLS and E-block are more efficient. A new module has been added in the workflow to locate phosphorylation (EPIYA-like) domains.</p>
<p>S4TE 2.0 consists of the S4TE 1.4 tool and a web interface available to non-commercial users at <ext-link ext-link-type="uri" xlink:href="http://sate.cirad.fr/" xlink:type="simple">http://sate.cirad.fr</ext-link>. The web interface is designed to make S4TE 2.0 easy to use for biologists and more time efficient. Most of the genomes and plasmids available in the NCBI database of pathogenic bacteria that have a type IV secretion system have been loaded into the S4TE 2.0 database so effectors can be predicted in only a few clicks.</p>
<p>S4TE 2.0 offers advanced users an expert mode (S4TE-EM) they can use to customize S4TE 2.0 search parameters (<italic>e</italic>.<italic>g</italic>. exclude modules, modify module weightings). In this mode, S4TE 2.0 can be used as 14 independent programs to search for particular features in a given bacterial genome (<italic>e</italic>.<italic>g</italic>. NLS, C-ter charges).</p>
<p>A new function for comparative genomics (S4TE-CG) has been added to compare up to four predicted effectomes in just a few seconds.</p>
<p>All S4TE 2.0 results are interactive and linked to NCBI and Pfam databases.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Design and implementation</title>
<sec id="sec003">
<title>Programming</title>
<p>S4TE 2.0 software consists in a graphical interface (website) to use the S4TE 1.4 algorithm for genome analysis, Type IV effectors (T4Es) prediction and comparison of effectomes. S4TE 1.4 is an update of S4TE 1.0 [<xref ref-type="bibr" rid="pcbi.1006847.ref001">1</xref>]. It is written in Perl programming language and uses NCBI, Pfam, EMBOSS, BioPerl and MitoFates libraries and its own proper programs and database. It was developed to improve the prediction performances of S4TE 1.0 and to provide new functionalities to search for new features, enable interactivity and comparative genomics. The 10 S4TE search modules in S4TE 1.0 were kept in S4TE 1.4. However, some modules have been modified (promoter motif search, homology, MLS, NLS, E-block and Pfam database) to improve their predictive power. A supplementary module (EPIYA search) has been added to the workflow. In this paper, only the EPIYA search module and the revised modules are described.</p>
</sec>
<sec id="sec004">
<title>Modified or novel searching modules in S4TE 2.0</title>
<sec id="sec005">
<title>Promoter motif search</title>
<p>As several T4Es in a given bacterium can be subjected to coordinated regulation with the same protein, <italic>e</italic>.<italic>g</italic>. PmrA [<xref ref-type="bibr" rid="pcbi.1006847.ref002">2</xref>], we used S4TE 2.0 to conduct a search for conserved motifs (potential regulatory motifs) in the short promoter regions of the genes. The aim was to improve S4TE 2.0 prediction of possible regulons of T4Es. Enriched DNA motifs were searched in the intergenic region upstream of the start codon, using MEME [<xref ref-type="bibr" rid="pcbi.1006847.ref003">3</xref>]. Eight consensus motifs were identified in different bacteria (<xref ref-type="fig" rid="pcbi.1006847.g001">Fig 1</xref>). The corresponding motif search module of S4TE 2.0 extracts the 5’ Flanking intergenic regions (5’ FIRs) and searches for all these motifs thanks to a position-specific scoring matrix generated from multiple sequence alignments with the promoters of known T4Es. Only alignments with a score above the chosen threshold are selected. The threshold that yielded the highest sensitivity and specificity for each motif in the corresponding bacterium was chosen (<xref ref-type="fig" rid="pcbi.1006847.g001">Fig 1</xref>).</p>
<fig id="pcbi.1006847.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Enriched DNA motifs found in several bacteria in the intergenic region upstream of known type IV effectors and implemented in S4TE 2.0 searches.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.g001" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec006">
<title>Homology</title>
<p>BLAST 2.2 was used to compare proteins to search for homologies with known T4Es [<xref ref-type="bibr" rid="pcbi.1006847.ref004">4</xref>]. The cut-off of the S4TE 1.0 homology module was changed. S4TE 2.0 compares the database containing all known T4Es with the query proteome and returns all orthologs with a cut-off of the expected value (E) &lt;10–4. This E-value cut-off was selected to find accurate orthologs between phylogenetically distant bacterial species. Databases containing proven effectors have also been updated (<xref ref-type="supplementary-material" rid="pcbi.1006847.s003">S3 Dataset</xref>).</p>
</sec>
<sec id="sec007">
<title>Nuclear localization signals (NLS)</title>
<p>NLS are protein sequences that target proteins in the nucleus of eukaryotic cells [<xref ref-type="bibr" rid="pcbi.1006847.ref005">5</xref>]. We assume that the occurrence of NLS in a bacterial protein sequence would be a good indicator of secretion. There are two classes of NLS, monopartite and bipartite. In S4TE 2.0, the search for monopartite NLS has been improved according to Ruhanen <italic>et al</italic>. [<xref ref-type="bibr" rid="pcbi.1006847.ref006">6</xref>]. We rewrote this module to add more known NLS motifs in the search. Monopartite NLS consist of [KR]-[KR]-[KR-][KR]-[KR], X-K-[KR]-[KRP]-[KR]-X, X-R-K-[KRP]-[KR]-X, X-R-K-X-[KR]-[KRP], X-K-[KR]-[KR]-X-[KRP], X-R-K-[KR]-X-[KRP], X-K-[KR]-X-[KR]-X-X, X-R-K-X-[KR]-X-X, X-K-[KR]-[KR]-X-X-X and X-R-K-[KR]-X-X-X motifs. Bipartite NLS were also searched with S4TE 1.0 motif (K-[KR]-X(6,20)-[KR]-[KR]-X-[KR]). The new module was tested with a dataset of 32 NLS and 32 no-NLS containing proteins (<xref ref-type="supplementary-material" rid="pcbi.1006847.s001">S1 Dataset</xref>). The module selected 24 true positives (TP) and only three false positives (FP). This represents a sensitivity (Se) of 75% and a specificity (Sp) of 91%.</p>
</sec>
<sec id="sec008">
<title>Mitochondrial localization signals (MLS)</title>
<p>MLS are signal sequences located in the N-terminus of proteins that are targeted to mitochondria. This sequence is cleaved after translocation of the protein inside the mitochondria [<xref ref-type="bibr" rid="pcbi.1006847.ref005">5</xref>,<xref ref-type="bibr" rid="pcbi.1006847.ref007">7</xref>]. To predict MLS in S4TE 2.0, we used the MitoFates tool [<xref ref-type="bibr" rid="pcbi.1006847.ref008">8</xref>]. MitoFates predicts mitochondrial presequences, a cleavable localization signal located in the N-terminal, and its cleaved position.</p>
</sec>
<sec id="sec009">
<title>E-block</title>
<p>The E-block domain consists of a glutamate sequence rich in C-terminal 30 amino acids and is associated with T4Es translocation in <italic>L</italic>. <italic>pneumophila</italic>. Huang <italic>et al</italic>. showed that an E-block motif is also important for the translocation of T4SS substrates [<xref ref-type="bibr" rid="pcbi.1006847.ref009">9</xref>]. In S4TE 2.0, the E-block module was modified according to Lifshitz <italic>et al</italic>. [<xref ref-type="bibr" rid="pcbi.1006847.ref010">10</xref>]. The E-block was searched in a window of 22 amino acids between position -4 C-terminal and -26 C-terminal. The motif that is searched for is a motif of 10 amino acids containing three or more glutamate (E) residues. The module was tested on 98 E-block and 98 no-E-block containing proteins (<xref ref-type="supplementary-material" rid="pcbi.1006847.s002">S2 Dataset</xref>). This module selected 60 TP and only 6 FP (Sensitivity of 61%, Specificity of 94%).</p>
</sec>
<sec id="sec010">
<title>Pfam database</title>
<p>The local Pfam database has been updated to find more eukaryotic domains of known effectors of <italic>Legionella pneumophila</italic> [<xref ref-type="bibr" rid="pcbi.1006847.ref010">10</xref>]. Eukaryotic domains were extracted from the whole Pfam database and added to the S4TE 2.0 workflow. All eukaryotic domains used for this search are listed in <xref ref-type="supplementary-material" rid="pcbi.1006847.s004">S4 Dataset</xref>.</p>
</sec>
<sec id="sec011">
<title>EPIYA search</title>
<p>EPIYA search is a new module implemented in S4TE 2.0. Bacterial EPIYA effectors are delivered into host cells by T4SS, where they undergo tyrosine phosphorylation at the EPIYA motif and thereby manipulate host signalling by tight interaction with SH2 domain-containing proteins [<xref ref-type="bibr" rid="pcbi.1006847.ref011">11</xref>]. In <italic>H</italic>. <italic>pylori</italic>, EPIYA has been shown to contribute to the secretion of a CagA effector [<xref ref-type="bibr" rid="pcbi.1006847.ref012">12</xref>]. Moreover the functional versatility of EPIYA motif highlights the importance of this emerging family of bacterial effectors [<xref ref-type="bibr" rid="pcbi.1006847.ref011">11</xref>]. We searched for conserved EPIYA motifs (EPIYA, ENIYE, NPLYE, EHLYA, TPLYA, EPLYA, ESIYE, EDLYA, EPIYG, EPVYA, VPNYA, EHIYD) in different bacteria that have a type IV secretion system and we searched for hypothetical EPIYA motifs using the motif E-X-X-Y-X.</p>
</sec>
</sec>
</sec>
<sec id="sec012">
<title>Result</title>
<sec id="sec013">
<title>Validation</title>
<p>S4TE 2.0 is a software with 14 independent modules. We tested all the modules independently. The 14 modules were weighted to make S4TE 2.0 efficient. The weighting of each module was calculated according to its performance in finding effectors in <italic>L</italic>. <italic>pneumophila</italic> Philadelphia I which has been shown to have the most extensive repertoire of T4Es ever identified, with 286 confirmed effectors [<xref ref-type="bibr" rid="pcbi.1006847.ref010">10</xref>].</p>
<p>Each module has its own weighting in S4TE 2.0 searches. The weightings were calculated for each module based on their Positive Predictive Value (PPV [PPV = TP/(TP+FP)]) for <italic>L</italic>. <italic>pneumophila</italic> (<xref ref-type="table" rid="pcbi.1006847.t001">Table 1</xref>).</p>
<table-wrap id="pcbi.1006847.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.t001</object-id>
<label>Table 1</label> <caption><title>Determination of S4TE 2.0 weightings according to the positive predictive value of each S4TE module in <italic>Legionella pneumophila</italic> Philadelphia 1.</title> <p>Enumeration of effectors predicted by individual features implemented in S4TE 2.0: the number of true positives, false positives and the positive predictive value (PPV, expressed in %) is indicated.</p></caption>
<alternatives>
<graphic id="pcbi.1006847.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">S4TE2.0 features</th>
<th align="center">Promoter motif</th>
<th align="center">Homology</th>
<th align="center">Euk-like domains</th>
<th align="center">DUF</th>
<th align="center">EPIYA</th>
<th align="center">NLS</th>
<th align="center">MLS</th>
<th align="center">Prenylation domain</th>
<th align="center">Coiled coil</th>
<th align="center">C-ter basicity</th>
<th align="center">C-ter charges</th>
<th align="center">C-ter hydrophobicity</th>
<th align="center">Global hydrophobicity</th>
<th align="center">E-block</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" style="background-color:#D9D9D9"><bold>True positives</bold></td>
<td align="center" style="background-color:#D9D9D9">108</td>
<td align="center" style="background-color:#D9D9D9">285</td>
<td align="center" style="background-color:#D9D9D9">13</td>
<td align="center" style="background-color:#D9D9D9">2</td>
<td align="center" style="background-color:#D9D9D9">30</td>
<td align="center" style="background-color:#D9D9D9">105</td>
<td align="center" style="background-color:#D9D9D9">6</td>
<td align="center" style="background-color:#D9D9D9">1</td>
<td align="center" style="background-color:#D9D9D9">100</td>
<td align="center" style="background-color:#D9D9D9">262</td>
<td align="center" style="background-color:#D9D9D9">62</td>
<td align="center" style="background-color:#D9D9D9">41</td>
<td align="center" style="background-color:#D9D9D9">117</td>
<td align="center" style="background-color:#D9D9D9">98</td>
</tr>
<tr>
<td align="center"><bold>False positives</bold></td>
<td align="center">434</td>
<td align="center">34</td>
<td align="center">27</td>
<td align="center">106</td>
<td align="center">101</td>
<td align="center">783</td>
<td align="center">79</td>
<td align="center">6</td>
<td align="center">231</td>
<td align="center">2376</td>
<td align="center">863</td>
<td align="center">339</td>
<td align="center">156</td>
<td align="center">232</td>
</tr>
<tr>
<td align="center" style="background-color:#D9D9D9"><bold>PPV (%)</bold></td>
<td align="center" style="background-color:#D9D9D9">20</td>
<td align="center" style="background-color:#D9D9D9">89</td>
<td align="center" style="background-color:#D9D9D9">32</td>
<td align="center" style="background-color:#D9D9D9">2</td>
<td align="center" style="background-color:#D9D9D9">23</td>
<td align="center" style="background-color:#D9D9D9">12</td>
<td align="center" style="background-color:#D9D9D9">7</td>
<td align="center" style="background-color:#D9D9D9">14</td>
<td align="center" style="background-color:#D9D9D9">30</td>
<td align="center" style="background-color:#D9D9D9">10</td>
<td align="center" style="background-color:#D9D9D9">7</td>
<td align="center" style="background-color:#D9D9D9">10</td>
<td align="center" style="background-color:#D9D9D9">42</td>
<td align="center" style="background-color:#D9D9D9">30</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>The S4TE 2.0 prediction threshold was then defined to enable the best prediction by disregarding homology with known effectors. The threshold was chosen by examining the Sensitivity (Se), Specificity (Sp), Positive Predictive Value (PPV), Negative Predictive Value (NPV) and Accuracy (Acc) for thresholds ranging from 40 to 120 on the test dataset (<xref ref-type="fig" rid="pcbi.1006847.g002">Fig 2</xref>). The threshold was set at a score of 72 to obtain the best global PPV possible with the least possible impact on sensitivity.</p>
<fig id="pcbi.1006847.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Distribution of S4TE 2.0 performances according to the threshold.</title>
<p>Plot of the sensitivity (Se), specificity (Sp), positive predictive value (PPV), negative predictive value (NPV) and accuracy (Acc) of S4TE 2.0 with no homology module on <italic>L. pneumophila</italic> genome as a function of the S4TE 2.0 threshold. A threshold of 72 proved to be the best combination of these characteristics.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.g002" xlink:type="simple"/>
</fig>
<p>This threshold combined with weightings led to the correct prediction (true positives) of 282 of the 286 effectors of <italic>L</italic>. <italic>pneumophila</italic> (Se = 98%, PPV = 60%) and 96 incorrect predictions (false positives) (Sp = 96%, NPV = 99%).</p>
<p>With this update, S4TE 2.0 prediction is more powerful than that of S4TE 1.0 whose sensitivity was 14% lower. Without homology, sensitivity increased by 25%. Other characteristics including specificity, accuracy and negative predictive value did not change significantly (<xref ref-type="table" rid="pcbi.1006847.t002">Table 2</xref>). S4TE 2.0 allows flexible, highly sensitive and specific detection of new putative T4SS effectors.</p>
<table-wrap id="pcbi.1006847.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.t002</object-id>
<label>Table 2</label> <caption><title>Performances comparison between S4TE 1.0 and S4TE 2.0 algorithms with or without the homology search module.</title></caption>
<alternatives>
<graphic id="pcbi.1006847.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">Software</th>
<th align="center" colspan="2">S4TE 1.0</th>
<th align="center" colspan="2">S4TE 2.0</th>
</tr>
<tr>
<th align="center">Homology</th>
<th align="center">With</th>
<th align="center">Without</th>
<th align="center">With</th>
<th align="center">Without</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center"><bold>Sensitivity</bold></td>
<td align="center">0.86</td>
<td align="center">0.16</td>
<td align="center">1</td>
<td align="center">0.41</td>
</tr>
<tr>
<td align="center"><bold>Specificity</bold></td>
<td align="center">0.97</td>
<td align="center">0.97</td>
<td align="center">0.93</td>
<td align="center">0.93</td>
</tr>
<tr>
<td align="center"><bold>Positive Predictive Value</bold></td>
<td align="center">0.74</td>
<td align="center">0.44</td>
<td align="center">0.60</td>
<td align="center">0.43</td>
</tr>
<tr>
<td align="center"><bold>Negative Predictive Value</bold></td>
<td align="center">0.98</td>
<td align="center">0.91</td>
<td align="center">1</td>
<td align="center">0.94</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
<sec id="sec014">
<title>SATE-CG</title>
<p>S4TE-CG is a new tool designed to compare different repertoires of putative T4Es identified by S4TE 2.0. The corresponding S4TE-CG algorithm is described in <xref ref-type="fig" rid="pcbi.1006847.g003">Fig 3</xref>. The user can compare up to four effectomes simultaneously. S4TE 2.0 results from selected genomes (effectomes) are compared with Blastp 2.2 with an expected value (E) cut-off of &lt;10–4 to find homologous proteins in each effectome. S4TE-CG successively compares all effectomes in a pairwise manner, the overlaps between the effectomes of each genome are calculated and the final results are plotted on a Venn diagram and listed in an interactive table. All effectors are clickable, and the user is redirected to the S4TE 2.0 results on the concerned effector. The table can be easily copied and pasted for export.</p>
<fig id="pcbi.1006847.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Flow chart of the comparison of 4 effectomes using S4TE-CG.</title>
<p>Users can compare up to four genomes simultaneously. (1) S4TE 2.0 results from selected genomes (effectomes) are compared with Blastp 2.2 to find homologous proteins in each effectome. (2) S4TE-CG successively compares all effectomes in a pairwise manner and calculates any overlaps between the effectomes of each genome. (3) The final results are plotted on a Venn diagram and listed in an interactive table.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.g003" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec015">
<title>Availability and future directions</title>
<sec id="sec016">
<title>Command-line interface</title>
<p>S4TE 2.0 is a web interface but the S4TE 1.4 package is freely available to non-commercial users at <ext-link ext-link-type="uri" xlink:href="http://sate.cirad.fr/S4TE-Doc.php" xlink:type="simple">http://sate.cirad.fr/S4TE-Doc.php</ext-link>. All programming was done using Perl 5.18 and BioPerl 1.6.1. The software runs on Linux platforms (Ubuntu 14.04 and Mac OS X). All required packages and the installation process are described in the user guide included in the package. The user guide also details S4TE options for running S4TE. By default, the command line to launch S4TE is <bold><monospace>./S4TE.pl -f “Genbank_file”</monospace></bold> <bold><monospace>in the S4TE folder</monospace></bold> <bold><monospace>(cd way_to_S4TE/S4TE/</monospace></bold>). Some options are available for the user to launch S4TE: <bold><monospace>-c</monospace></bold>, suppression of a module in the pipeline; <bold><monospace>-w</monospace></bold>, modification of the weight of each module in the pipeline; <bold><monospace>-t</monospace></bold>, imposition of a threshold for effector selection. Each S4TE module creates a .txt file in the folder <bold><monospace>way_to_S4TE/S4TE/Jobs/ job&lt;Name_of_genome_folder&gt;&lt;year&gt;&lt;month&gt;&lt;day&gt;&lt;hour&gt;&lt;min&gt;</monospace></bold></p>
<p>All the results are compiled in the <italic>CompilationFile</italic>.<italic>txt</italic> and <italic>Results</italic>.<italic>txt</italic> in the same folder.</p>
</sec>
<sec id="sec017">
<title>Web interface</title>
<p>The S4TE 2.0 website is powered from scratch on the ‘CIRAD web server’. All the features of the web site were tested on common web browsers. S4TE 2.0 found T4Es in large genome database (<xref ref-type="supplementary-material" rid="pcbi.1006847.s005">S5 Dataset</xref>) available to all users. A user account is available and necessary to keep your jobs for up to three months, to import your own genome in a S4TE 2.0 temporary database and to ask to add a new proved effector in the database. The addition of an effector to the database must be accompanied by a reference (scientific article) and will be checked manually before the effector is added to the database. Those who subscribed to the newsletter will be notified by email about the addition of new effectors to the database and the effector will be visible in the S4TE 2.0-tab strip. This free account allows users to search for proteins in the S4TE 2.0 database using the name, the locus tag or NCBI number of a protein in the search bar. The account also allows the user to subscribe to the S4TE newsletter that summarizes any changes made to the software and provide updates on the latest research on Type IV Effectors.</p>
</sec>
<sec id="sec018">
<title>S4TE 2.0 is a simple and user-friendly tool</title>
<p>S4TE 2.0 is a web-based user-friendly tool that gets results in only a few clicks. The user can locate a chromosome in more than 340 bacterial genomes and plasmids available in the database and the results can be viewed by clicking on run S4TE 2.0 (<xref ref-type="supplementary-material" rid="pcbi.1006847.s005">S5 Dataset</xref>).</p>
<p>If the desired genome is not available in the database, the user can import it with a GenBank file (.gbk). S4TE 2.0 will import the file to a temporary database for three months. All S4TE tools (S4TE-EM and S4TE-CG) can then be used on the uploaded genome.</p>
<p>The S4TE 2.0 web page allows users to read some of the news published in the newsletter. Five news items are visible on the S4TE2.0 web page, but all the news can be found by clicking on the bottom right link.</p>
<p><xref ref-type="fig" rid="pcbi.1006847.g004">Fig 4</xref> presents some results obtained with S4TE 2.0. All the proteins in the selected genome are represented on the S4TE 2.0 web results page. A score was calculated for each protein based on the weighting of each module. Proteins were ranked according to the same score. All proteins whose scores are above the threshold are considered as belonging to the S4TE 2.0 effectome. An iconography was created to help read the list (<xref ref-type="fig" rid="pcbi.1006847.g004">Fig 4A</xref>). Users can find all the details concerning each characteristic of a given protein by clicking on the protein on the web results page.</p>
<fig id="pcbi.1006847.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Example of S4TE 2.0 results for <italic>Anaplasma phagocytophilum</italic> HZ.</title>
<p>APH-0740. (A) Schematic representations of proteins with different characteristics present in the sequence are shown. Characteristics are easy to find by highlighting the corresponding sequence in the effector sequence. These characteristics are detailed below the sequence. (B) Distribution of S4TE 2.0 predicted type IV effectors (T4Es) according to local gene density. The predicted T4Es are plotted according to the length of their flanking intergenic regions (FIRs). All <italic>A. phagocytophilum</italic> genes were sorted into 2-dimensional bins according to the length of their 5′ (y-axis) and 3′ (x-axis) FIRs. The number of genes in the bins is represented by a color-coded density graph. Genes whose FIRs are both longer than the median FIR length were considered as gene-sparse region (GSR) genes. Genes whose FIRs are both below the median value were considered as gene-dense region (GDR) genes. In-between region (IBR) genes are genes with a long 5′FIR and short 3′FIR, or inversely. Candidate T4Es predicted using the S4TE 2.0 algorithm were s plotted on this distribution according to their own 3′ and 5′ FIRs. A color is assigned to each of the three following groups: Red to GDRs, orange to IBRs, and blue to GSRs. (C) Genome-wide distribution of predicted effectome according to the G+C content. From outer track to inner track, sense and antisense genes (black), S4TE 2.0 putative T4Es (pink), proved T4Es (turquoise), S4TE 2.0 putative T4Es in genomic region with low G+C content (yellow), S4TE 2.0 putative T4Es in genomic region with high G+C content (blue), G+C ≥ average G+C (red), G+C &lt; average G+C (green).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.g004" xlink:type="simple"/>
</fig>
<p>When a user runs S4TE 2.0, in addition to the results page, two graphs are automatically drawn. The first shows the distribution of predicted effectors according to local gene density (<xref ref-type="fig" rid="pcbi.1006847.g004">Fig 4B</xref>). The second one displays the distribution of predicted T4Es according to the G+C content along the genome (<xref ref-type="fig" rid="pcbi.1006847.g004">Fig 4C</xref>).</p>
</sec>
<sec id="sec019">
<title>S4TE-EM expert mode for accurate searching</title>
<p>S4TE-EM is the expert mode of S4TE 2.0. S4TE-EM allows the user to modify the weights of each module and to deactivate one or more modules in the search (<xref ref-type="fig" rid="pcbi.1006847.g005">Fig 5</xref>). The weight of a module can be changed by moving the position of the cursor next to the name of each module. Weightings can be changed between the lowest weighting available for the module and the threshold of S4TE 2.0 (<bold><monospace>t</monospace></bold> = 72). The lowest weight is calculated independently for each module as a function of the positive predictive value and corresponds to a value equal to 0.5. Users can also cancel one or more modules in the pipeline by unchecking the box next to the name of the module (<xref ref-type="fig" rid="pcbi.1006847.g005">Fig 5</xref>).</p>
<fig id="pcbi.1006847.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1006847.g005</object-id>
<label>Fig 5</label>
<caption>
<title>The new front page of the S4TE-EM tool.</title>
<p>The right side provides some information about the page. The right side matches the user account. The user account shows all the jobs previously ran in S4TE 2.0 and S4TE-CG. This account makes it possible to search for a protein with the search bar and to ask to add a proven T4 effector in the database. In the central part of the workspace, the user can select a genome in the drop-down menu. In S4TE-EM, the user can change the weighting or disable one or more modules (on the left) shown in the S4TE diagram (on the right) and run S4TE-EM by clicking on the ‘Run S4TE-EM’ button.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.g005" xlink:type="simple"/>
</fig>
<p>All the modules are independent, and users can use S4TE-EM to locate the same characteristic throughout the genome. For example, if the user disables all the modules except NLS, S4TE-EM will find all proteins with a NLS in the genome, meaning users can use S4TE-EM as a new genome analysis tool.</p>
</sec>
<sec id="sec020">
<title>S4TE-CG comparative genomics tool to compare effectomes</title>
<p>S4TE-CG is a new tool designed to compare different effectomes predicted by S4TE 2.0. Users can choose up to four effectomes in S4TE 2.0 databases or upload a genome present in the temporary database. S4TE-CG displays results in a Venn diagram and in an interactive table. Users can easily find different subsets of information in the appropriate table by referring to the different colors in the Venn diagram (<xref ref-type="fig" rid="pcbi.1006847.g003">Fig 3</xref>). Information about each effector can easily be found by clicking on the name of the effector in the table. Or users can simply copy and paste the table in a .csv file.</p>
</sec>
</sec>
</sec>
<sec id="sec021" sec-type="conclusions">
<title>Conclusions</title>
<p>This paper presents updated S4TE software. The computational tool is designed to predict the presence of T4SS effector proteins in bacteria. The identification of T4Es and some characteristics are improved in this update. Compared with a machine learning approach, using S4TE 2.0 to predict T4Es in <italic>Legionella</italic> and <italic>Coxiella</italic> species [<xref ref-type="bibr" rid="pcbi.1006847.ref010">10</xref>,<xref ref-type="bibr" rid="pcbi.1006847.ref013">13</xref>,<xref ref-type="bibr" rid="pcbi.1006847.ref014">14</xref>] improved sensitivity (98% for S4TE 2.0 and 89% for Wang <italic>et al</italic>.) and equivalent specificity (97% for Wang <italic>et al</italic>. and 93% for S4TE 2.0). S4TE 2.0 is easy to use: only an internet connection and a few clicks are needed to search for T4Es in more than 340 bacterial genomes and plasmids. The results are displayed instantaneously for easy reading. An automated pipeline is also provided to analyze and visualize effector distribution in the genome according to G+C content and local gene density. S4TE 2.0 results are linked to bioinformatics databases like NCBI and Pfam. The S4TE 2.0 database is designed to evolve and will be updated by adding new proven effectors and new bacterial genomes. S4TE 2.0 not only predicts the T4Es but also their subcellular localization (NLS, MLS, prenylation) and the function of these proteins (Coiled coils, EPIYA, Euk-like, etc.). All these features make S4TE 2.0 a powerful software for studies of T4Es.</p>
<p>S4TE 2.0 also offers an expert mode, which allows users to make manual adjustments to the weight of the modules. Each module that searches for a feature or a characteristic can be used independently. Thus, S4TE-EM can be viewed and used as 14 independent programs. This could facilitate the annotation of new genomes by looking for specific features such as NLS, prenylation domains, etc.</p>
<p>Finally, S4TE-CG makes it possible for users to compare effectomes to highlight core T4 effectomes and/or accessory T4 effectomes to understand how effectomes evolved and may provide clues to the specificity of different strains.</p>
</sec>
<sec id="sec022">
<title>Supporting information</title>
<supplementary-material id="pcbi.1006847.s001" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.s001" xlink:type="simple">
<label>S1 Dataset</label>
<caption>
<title>List of 32 NLS-containing proteins used to test S4TE 2.0 NLS module.</title>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1006847.s002" mimetype="application/vnd.ms-excel" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.s002" xlink:type="simple">
<label>S2 Dataset</label>
<caption>
<title>List of 98 E-block-containing proteins used to test S4TE 2.0 E-block module.</title>
<p>(XLS)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1006847.s003" mimetype="application/vnd.ms-excel" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.s003" xlink:type="simple">
<label>S3 Dataset</label>
<caption>
<title>List of proteins known as type IV effectors.</title>
<p>(XLS)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1006847.s004" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.s004" xlink:type="simple">
<label>S4 Dataset</label>
<caption>
<title>List of 82 eukaryotic domains searched by S4TE.</title>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1006847.s005" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1006847.s005" xlink:type="simple">
<label>S5 Dataset</label>
<caption>
<title>Bacterial genomes present in S4TE 2.0 database.</title>
<p>(XLSX)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We thank all members of the Center for Research and surveillance on Vector-borne diseases in the Caribbean (CRVC), part of ASTRE joint research unit between CIRAD and INRA, for fruitful discussions and useful suggestions.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pcbi.1006847.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Meyer</surname> <given-names>DF</given-names></name>, <name name-style="western"><surname>Noroy</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Moumène</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Raffaele</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Albina</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Vachiéry</surname> <given-names>N</given-names></name>: <article-title>Searching algorithm for type IV secretion system effectors 1.0: a tool for predicting type IV effectors and exploring their genomic context</article-title>. <source>Nucleic acids research</source> <year>2013</year>, <volume>41</volume>:<fpage>9218</fpage>–<lpage>29</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkt718" xlink:type="simple">10.1093/nar/gkt718</ext-link></comment> <object-id pub-id-type="pmid">23945940</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zusman</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Aloni</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Halperin</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Kotzer</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Degtyar</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Feldman</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Segal</surname> <given-names>G</given-names></name>: <article-title>The response regulator PmrA is a major regulator of the icm/dot type IV secretion system in Legionella pneumophila and Coxiella burnetii</article-title>. <source>Molecular microbiology</source> <year>2007</year>, <volume>63</volume>:<fpage>1508</fpage>–<lpage>23</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.1365-2958.2007.05604.x" xlink:type="simple">10.1111/j.1365-2958.2007.05604.x</ext-link></comment> <object-id pub-id-type="pmid">17302824</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bailey</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Boden</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Buske</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Frith</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Grant</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Clementi</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Ren</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Noble</surname> <given-names>W</given-names></name>: <article-title>MEME SUITE: tools for motif discovery and searching</article-title>. <source>Nucleic acids research</source> <year>2009</year>, <volume>37</volume>:<fpage>W202</fpage>–<lpage>8</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkp335" xlink:type="simple">10.1093/nar/gkp335</ext-link></comment> <object-id pub-id-type="pmid">19458158</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Johnson</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Zaretskaya</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Raytselis</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Merezhuk</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>McGinnis</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Madden</surname> <given-names>T</given-names></name>: <article-title>NCBI BLAST: a better web interface</article-title>. <source>Nucleic acids research</source> <year>2008</year>, <volume>36</volume>:<fpage>W5</fpage>–<lpage>9</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkn201" xlink:type="simple">10.1093/nar/gkn201</ext-link></comment> <object-id pub-id-type="pmid">18440982</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hicks</surname> <given-names>SW</given-names></name>, <name name-style="western"><surname>Galán</surname> <given-names>JE</given-names></name>: <article-title>Exploitation of eukaryotic subcellular targeting mechanisms by bacterial effectors</article-title>. <source>Nature Reviews Microbiology</source> <year>2013</year>, <volume>11</volume>:<fpage>316</fpage>–<lpage>326</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nrmicro3009" xlink:type="simple">10.1038/nrmicro3009</ext-link></comment> <object-id pub-id-type="pmid">23588250</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ruhanen</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Hurley</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Ghosh</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>O’Brien</surname> <given-names>KT</given-names></name>, <name name-style="western"><surname>Johnston</surname> <given-names>CRR</given-names></name>, <name name-style="western"><surname>Shields</surname> <given-names>DC</given-names></name>: <article-title>Potential of known and short prokaryotic protein motifs as a basis for novel peptide-based antibacterial therapeutics: a computational survey</article-title>. <source>Front Microbiol</source> <year>2014</year>, <volume>5</volume>:<fpage>4</fpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fmicb.2014.00004" xlink:type="simple">10.3389/fmicb.2014.00004</ext-link></comment> <object-id pub-id-type="pmid">24478765</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Niu</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Kozjak-Pavlovic</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Rudel</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Rikihisa</surname> <given-names>Y</given-names></name>: <article-title>Anaplasma phagocytophilum Ats-1 Is Imported into Host Cell Mitochondria and Interferes with Apoptosis Induction</article-title>. <source>PLoS Pathogens</source> <year>2010</year>, <volume>6</volume>:<fpage>e1000774</fpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.ppat.1000774" xlink:type="simple">10.1371/journal.ppat.1000774</ext-link></comment> <object-id pub-id-type="pmid">20174550</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Fukasawa</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Tsuji</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Fu</surname> <given-names>S-CC</given-names></name>, <name name-style="western"><surname>Tomii</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Horton</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Imai</surname> <given-names>K</given-names></name>: <article-title>MitoFates: improved prediction of mitochondrial targeting sequences and their cleavage sites</article-title>. <source>Mol. Cell Proteomics</source> <year>2015</year>, <volume>14</volume>:<fpage>1113</fpage>–<lpage>26</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1074/mcp.M114.043083" xlink:type="simple">10.1074/mcp.M114.043083</ext-link></comment> <object-id pub-id-type="pmid">25670805</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Boyd</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Amyot</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Hempstead</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Luo</surname> <given-names>Z-Q</given-names></name>, <name name-style="western"><surname>O’Connor</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Machner</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Montminy</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Isberg</surname> <given-names>R</given-names></name>: <article-title>The E Block motif is associated with Legionella pneumophila translocated substrates</article-title>. <source>Cellular Microbiology</source>, <volume>13</volume>:<fpage>227</fpage>–<lpage>245</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.1462-5822.2010.01531.x" xlink:type="simple">10.1111/j.1462-5822.2010.01531.x</ext-link></comment> <object-id pub-id-type="pmid">20880356</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lifshitz</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Burstein</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Peeri</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Zusman</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Schwartz</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Shuman</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Pupko</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Segal</surname> <given-names>G</given-names></name>: <article-title>Computational modeling and experimental validation of the Legionella and Coxiella virulence-related type-IVB secretion signal</article-title>. <source>Proc National Acad Sci</source> <year>2013</year>, <volume>110</volume>:<fpage>E707</fpage>–<lpage>E715</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1215278110" xlink:type="simple">10.1073/pnas.1215278110</ext-link></comment> <object-id pub-id-type="pmid">23382224</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Safari</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Murata-Kamiya</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Saito</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Hatakeyama</surname> <given-names>M</given-names></name>: <article-title>Mammalian Pragmin regulates Src family kinases via the Glu-Pro-Ile-Tyr-Ala (EPIYA) motif that is exploited by bacterial effectors</article-title>. <source>Proc. Natl. Acad. Sci. U.S.A</source>. <year>2011</year>, <volume>108</volume>:<fpage>14938</fpage>–<lpage>43</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1107740108" xlink:type="simple">10.1073/pnas.1107740108</ext-link></comment> <object-id pub-id-type="pmid">21873224</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Papadakos</surname> <given-names>KS</given-names></name>, <name name-style="western"><surname>Sougleri</surname> <given-names>IS</given-names></name>, <name name-style="western"><surname>Mentis</surname> <given-names>AF</given-names></name>, <name name-style="western"><surname>Hatziloukas</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Sgouras</surname> <given-names>DN</given-names></name>: <article-title>Presence of terminal EPIYA phosphorylation motifs in Helicobacter pylori CagA contributes to IL-8 secretion, irrespective of the number of repeats</article-title>. <source>PLoS ONE</source> <year>2013</year>, <volume>8</volume>:<fpage>e56291</fpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0056291" xlink:type="simple">10.1371/journal.pone.0056291</ext-link></comment> <object-id pub-id-type="pmid">23409168</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lifshitz</surname> <given-names>Z</given-names></name>, <name name-style="western"><surname>Burstein</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Schwartz</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Shuman</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Pupko</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Segal</surname> <given-names>G</given-names></name>: <article-title>Identification of Novel Coxiella burnetii Icm/Dot Effectors and Genetic Analysis of Their Involvement in Modulating a Mitogen-Activated Protein Kinase Pathway</article-title>. <source>Infect Immun</source> <year>2014</year>, <volume>82</volume>:<fpage>3740</fpage>–<lpage>3752</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1128/IAI.01729-14" xlink:type="simple">10.1128/IAI.01729-14</ext-link></comment> <object-id pub-id-type="pmid">24958706</object-id></mixed-citation></ref>
<ref id="pcbi.1006847.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Wei</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Bao</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>S-L</given-names></name>: <article-title>Prediction of bacterial type IV secreted effectors by C-terminal features</article-title>. <source>Bmc Genomics</source> <year>2014</year>, <volume>15</volume>:<fpage>1</fpage>–<lpage>14</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2164-15-1" xlink:type="simple">10.1186/1471-2164-15-1</ext-link></comment></mixed-citation></ref>
</ref-list>
</back>
</article>