<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1005867</article-id>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-17-01217</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Pathology and laboratory medicine</subject><subj-group><subject>Pathogenesis</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Biochemistry</subject><subj-group><subject>Nucleic acids</subject><subj-group><subject>RNA</subject><subj-group><subject>Non-coding RNA</subject><subj-group><subject>Transfer RNA</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Biochemistry</subject><subj-group><subject>Bioenergetics</subject><subj-group><subject>Energy-producing organelles</subject><subj-group><subject>Mitochondria</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Cell biology</subject><subj-group><subject>Cellular structures and organelles</subject><subj-group><subject>Energy-producing organelles</subject><subj-group><subject>Mitochondria</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Biological databases</subject><subj-group><subject>Sequence databases</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Bioinformatics</subject><subj-group><subject>Sequence analysis</subject><subj-group><subject>Sequence databases</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Heredity</subject><subj-group><subject>Heteroplasmy</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Database and informatics methods</subject><subj-group><subject>Bioinformatics</subject><subj-group><subject>Sequence analysis</subject><subj-group><subject>Sequence alignment</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Evolutionary biology</subject><subj-group><subject>Population genetics</subject><subj-group><subject>Haplogroups</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Population genetics</subject><subj-group><subject>Haplogroups</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Population biology</subject><subj-group><subject>Population genetics</subject><subj-group><subject>Haplogroups</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Mathematical and statistical techniques</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Forecasting</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Statistics (mathematics)</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Forecasting</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Predicting the pathogenicity of novel variants in mitochondrial tRNA with MitoTIP</article-title>
<alt-title alt-title-type="running-head">Predicting the pathogenicity of novel variants in mitochondrial tRNA with MitoTIP</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Sonney</surname>
<given-names>Sanjay</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Validation</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-7224-9620</contrib-id>
<name name-style="western">
<surname>Leipzig</surname>
<given-names>Jeremy</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lott</surname>
<given-names>Marie T.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Zhang</surname>
<given-names>Shiping</given-names>
</name>
<role content-type="http://credit.casrai.org/">Software</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-1537-4684</contrib-id>
<name name-style="western">
<surname>Procaccio</surname>
<given-names>Vincent</given-names>
</name>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Wallace</surname>
<given-names>Douglas C.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-9982-5712</contrib-id>
<name name-style="western">
<surname>Sondheimer</surname>
<given-names>Neal</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Project administration</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Validation</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff006"><sup>6</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Division of Clinical and Metabolic Genetics, The Hospital for Sick Children, Toronto, Ontario, Canada</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Department of Biomedical and Health Informatics, The Children’s Hospital of Philadelphia, Philadelphia, Pennsylvania, United States of America</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>The Center for Mitochondrial and Epigenomic Medicine, The Children’s Hospital of Philadelphia, Philadelphia, Pennsylvania, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>UMR CNRS 6015-INSERM U1083, MitoVasc Institute, Angers University Hospital, Angers, France</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>Department of Pathology, The University of Pennsylvania, Philadelphia, Pennsylvania, United States of America</addr-line></aff>
<aff id="aff006"><label>6</label> <addr-line>Department of Paediatrics, The University of Toronto, Toronto, Ontario, Canada</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Poisot</surname>
<given-names>Timothée</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Universite de Montreal, CANADA</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">neal.sondheimer@sickkids.ca</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>11</day>
<month>12</month>
<year>2017</year>
</pub-date>
<pub-date pub-type="collection">
<month>12</month>
<year>2017</year>
</pub-date>
<volume>13</volume>
<issue>12</issue>
<elocation-id>e1005867</elocation-id>
<history>
<date date-type="received">
<day>21</day>
<month>7</month>
<year>2017</year>
</date>
<date date-type="accepted">
<day>2</day>
<month>11</month>
<year>2017</year>
</date>
</history>
<permissions>
<copyright-year>2017</copyright-year>
<copyright-holder>Sonney et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1005867"/>
<abstract>
<p>Novel or rare variants in mitochondrial tRNA sequences may be observed after mitochondrial DNA analysis. Determining whether these variants are pathogenic is critical, but confirmation of the effect of a variant on mitochondrial function can be challenging. We have used available databases of benign and pathogenic variants, alignment between diverse tRNAs, structural information and comparative genomics to predict the impact of all possible single-base variants and deletions. The Mitochondrial tRNA Informatics Predictor (MitoTIP) is available through MITOMAP at <ext-link ext-link-type="uri" xlink:href="http://www.mitomap.org" xlink:type="simple">www.mitomap.org</ext-link>. The source code for MitoTIP is available at <ext-link ext-link-type="uri" xlink:href="http://www.github.com/sonneysa/MitoTIP" xlink:type="simple">www.github.com/sonneysa/MitoTIP</ext-link>.</p>
</abstract>
<funding-group>
<funding-statement>The authors were funded by the Centre for Genetic Medicine and The PeRCS Program at The Hospital for Sick Children. MITOMAP is funded by NIH/NINDS 5R01-NS021328-30. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="2"/>
<table-count count="1"/>
<page-count count="8"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>PLOS Publication Stage</meta-name>
<meta-value>vor-update-to-uncorrected-proof</meta-value>
</custom-meta>
<custom-meta>
<meta-name>Publication Update</meta-name>
<meta-value>2017-12-21</meta-value>
</custom-meta>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>Pathogenicity and variant incidence data is available at MITOMAP and can be accessed at <ext-link ext-link-type="uri" xlink:href="https://mitomap.org/foswiki/bin/view/MITOMAP/Resources" xlink:type="simple">https://mitomap.org/foswiki/bin/view/MITOMAP/Resources</ext-link>. Secondary structures are available at MAMIT-tRNA at <ext-link ext-link-type="uri" xlink:href="http://mamit-trna.u-strasbg.fr/human.asp" xlink:type="simple">http://mamit-trna.u-strasbg.fr/human.asp</ext-link>. tRNA conservation data is available at <ext-link ext-link-type="uri" xlink:href="http://mamit-trna.u-strasbg.fr/tables.asp?aminoacid=7" xlink:type="simple">http://mamit-trna.u-strasbg.fr/tables.asp?aminoacid=7</ext-link>. The authors confirm that all data underlying the findings are fully available without restriction. All relevant data are within the paper and its Supporting Information files.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<disp-quote>
<p>This is a <italic>PLOS Computational Biology</italic> Software paper.</p>
</disp-quote>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Variants in mitochondrial tRNAs are an important and common cause of mitochondrial disease. Although some variants have become familiar, determining the pathogenicity of novel identified variants in tRNA-encoding sequences of patients with suspected mitochondrial disease remains problematic. The definitive confirmation of pathogenicity for a novel variant is best accomplished by transmitochondrial cybrid studies [<xref ref-type="bibr" rid="pcbi.1005867.ref001">1</xref>] or by analysis of heteroplasmy in single muscle fibers [<xref ref-type="bibr" rid="pcbi.1005867.ref002">2</xref>]. However, both of these studies require laboratory facilities with specialized equipment and specific types of patient samples. As an aid in diagnosis, bioinformatic approaches to predict the effects of variants have been considered previously. Approaches to prediction have used conservation between species [<xref ref-type="bibr" rid="pcbi.1005867.ref003">3</xref>,<xref ref-type="bibr" rid="pcbi.1005867.ref004">4</xref>] and machine learning in combination with the presence or absence of heteroplasmy [<xref ref-type="bibr" rid="pcbi.1005867.ref005">5</xref>]. Here we have predicted the potential impact of all possible variants and single-base deletions from the revised Cambridge Reference Sequence (rCRS) in mitochondrial tRNAs. Our predictive algorithm incorporates an estimation of the importance of a position across all known mitochondrial tRNAs using data from publicly available databases. Comparisons between structurally similar mitochondrial tRNAs improved the sensitivity and specificity of predictions over other available predictive systems.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Design and implementation</title>
<p>A database of reference benign and pathogenic variants (<xref ref-type="supplementary-material" rid="pcbi.1005867.s003">S1 Table</xref>) was created from a comprehensive PubMed search and from publicly available information accessible through MITOMAP [<xref ref-type="bibr" rid="pcbi.1005867.ref006">6</xref>]. The MITOMAP analysis of sequence diversity is drawn directly from GenBank full sequence data. Interspecific comparison was adapted from Mamit-tRNA and included sequence for all species from the superorder Euarchontoglires [<xref ref-type="bibr" rid="pcbi.1005867.ref007">7</xref>]. Given the large number of mitochondrial sequences now available from GenBank (n = 37,545 accessed June 2017), we inferred that a lack of observed variation represents a requirement for sequence conservation and we penalized these unobserved variants accordingly. These information sources were combined to provide a <bold>variant history and conservation score</bold> for each variant (see <xref ref-type="supplementary-material" rid="pcbi.1005867.s001">S1 Fig</xref> for complete scoring algorithm).</p>
<p>To create a profile of the likelihood of pathogenic variants occurring at positions within a generic tRNA secondary structure, the sequence of the mitochondrial tRNAs were aligned by anchoring the sequence to the predicted acceptor, D, anticodon and TψC stems, as well as to the anticodon itself. Using this alignment we defined the potential pathogenicity caused by mutation at positions in a generic tRNA (<xref ref-type="fig" rid="pcbi.1005867.g001">Fig 1A</xref>). This sub-scoring, called <bold>position score</bold>, reflected the presence of pathogenic and benign variants in other tRNAs at analogous positions.</p>
<fig id="pcbi.1005867.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1005867.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Position and stem penalties for pathogenicity prediction.</title>
<p>(A) The variant history and conservation scores at the analogous positions of every mitochondrial tRNA were averaged and used to score a generic tRNA structure. This identifies the regions of the tRNA that are most vulnerable to pathogenic variants. (B) Variants at base pairing regions are assessed based on the steric hindrance they induce, with the highest scores assigned at the ends of the stems region as shown in the scoring heat map for the phenylalanine tRNA.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.g001" xlink:type="simple"/>
</fig>
<p>Finally, for nucleotides within any of the tRNA stems, we evaluated the steric impact of variants by penalizing for mispairing or bulky substitutions and by penalizing more highly for variants at the end of the stem using a quadratic function (<xref ref-type="fig" rid="pcbi.1005867.g001">Fig 1B</xref>) This sub-scoring was called <bold>secondary structure score</bold>. The total <bold>pathogenicity score</bold> for each possible variant was calculated by summation of the three sub-scores.</p>
</sec>
<sec id="sec003">
<title>Results and discussion</title>
<p>In order to optimize the system and confirm the validity of pathogenicity prediction we tested it by re-evaluating reference pathogenic (n = 38) and benign (n = 651) variants. To provide an effective test of its ability to discriminate pathogenic and benign variants, we removed available data on each variant iteratively, and examined the ability of the system to predict the effect of each variant naively. The algorithm was modified by altering scaling factors using a differential evolution optimization program (SciPy) that maximized the sensitivity and specificity of the detection system (<xref ref-type="supplementary-material" rid="pcbi.1005867.s004">S2 Table</xref>) [<xref ref-type="bibr" rid="pcbi.1005867.ref008">8</xref>].</p>
<p>The final scoring system provided good discrimination of known pathogenic and benign variants (<xref ref-type="fig" rid="pcbi.1005867.g002">Fig 2A</xref>). Using a single point pathogenicity score cutoff, the system had a sensitivity and specificity of 74% (<xref ref-type="fig" rid="pcbi.1005867.g002">Fig 2B</xref>). We compared our system to a more limited set of pathogenic and benign positions that were used to test both the machine learning predictive system and the interspecific homology system proposed by Kondrashov and our system provided superior sensitivity and specificity (<xref ref-type="table" rid="pcbi.1005867.t001">Table 1</xref>) [<xref ref-type="bibr" rid="pcbi.1005867.ref003">3</xref>,<xref ref-type="bibr" rid="pcbi.1005867.ref005">5</xref>].</p>
<fig id="pcbi.1005867.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1005867.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Separation of benign and pathogenic variants by MitoTIP.</title>
<p>(A) Pathogenicity scores from naïve evaluations of known pathogenic (n = 38) and described benign (n = 651) variants plotted using box and whiskers at 5–95% (<italic>p</italic>&lt;0.0001 by Mann Whitney test). Negative scoring is possible when polymorphisms improve Watson-Crick pairing in stems. (B) Sensitivity and specificity plot of these data at a range of pathogenicity scores. The crossover pathogenicity score was 12.8.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.g002" xlink:type="simple"/>
</fig>
<table-wrap id="pcbi.1005867.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1005867.t001</object-id>
<label>Table 1</label> <caption><title>Comparison between predictive systems.</title></caption>
<alternatives>
<graphic id="pcbi.1005867.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">System</th>
<th align="center">Sensitivity</th>
<th align="center">Specificity</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">PON-mt-tRNA</td>
<td align="center">69%</td>
<td align="center">70%</td>
</tr>
<tr>
<td align="center">Kondrashov</td>
<td align="center">87%</td>
<td align="center">47%</td>
</tr>
<tr>
<td align="center">Mito-TIP</td>
<td align="center">74%</td>
<td align="center">75%</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>As another demonstration of the specificity of this method, we evaluated scoring at positions associated with haplogroups, as haplogroup-associated variants would generally be presumed to be non-pathogenic. We identified all haplogroup-associated polymorphisms that have been sequenced a minimum of 10 times in GenBank (n = 619). Only three of these exceeded our combined threshold value for pathogenicity, and all of these had been linked to diseases in published studies (see <xref ref-type="supplementary-material" rid="pcbi.1005867.s005">S3 Table</xref>). For example, m.5628T&gt;C, which is found in individuals from multiple haplogroups, has been associated with chronic progressive external ophthalmoplegia (CPEO) and hearing loss [<xref ref-type="bibr" rid="pcbi.1005867.ref009">9</xref>,<xref ref-type="bibr" rid="pcbi.1005867.ref010">10</xref>]. The reports of pathogenicity in this case are sufficient to inflate the MitoTIP score into the pathogenic range, whereas the same variant analyzed without these reports return a score of possibly benign.</p>
<p>This highlights a disadvantage of MitoTIP’s use of databases. The m.5628T&gt;C variant was initially reported as a heteroplasmic variant with a 40% mutation load causing late-onset CPEO [<xref ref-type="bibr" rid="pcbi.1005867.ref009">9</xref>]. A second study reports the same variant as being a phenotypic modifier for hearing loss in a family that was homoplasmic for the m.5628T&gt;C variant, but there is no mention of CPEO [<xref ref-type="bibr" rid="pcbi.1005867.ref010">10</xref>]. This casts doubt on the first study reporting disease association and suggests that this variant may be wrongly classified as pathogenic in both studies.</p>
<p>MitoTIP is designed for the analysis of novel variants, where previous data confirming pathogenicity is unavailable. Several known pathogenic variants such as m.8344A&gt;G score poorly in MitoTIP because the position is neither well conserved nor in a secondary structure location commonly associated with disease. A complete list of the pathogenic mutations scoring in the bottom two quartiles for pathogenicity (n = 5) is provided in <xref ref-type="supplementary-material" rid="pcbi.1005867.s006">S4 Table</xref>.</p>
</sec>
<sec id="sec004">
<title>Availability and future directions</title>
<p>For end users, we have created an interface called the Mitochondrial tRNA Informatics Predictor (MitoTIP—screenshots of interface in <xref ref-type="supplementary-material" rid="pcbi.1005867.s002">S2 Fig</xref>). MitoTIP is accessed within the pre-existing structure of MITOMAP (<ext-link ext-link-type="uri" xlink:href="http://www.mitomap.org" xlink:type="simple">www.mitomap.org</ext-link>). Users can input any tRNA-encoding position into MITOMAP’s point variant search or into MITOMASTER’s SNV Query and retrieve the predicted pathogenicity score of any possible change at that position.</p>
<p>MitoTIP was designed to evaluate novel or infrequently observed single nucleotide variants in tRNA sequence. By design, the display of MitoTIP scoring is suppressed for known pathogenic variants and common variants that are associated with haplogroup. Variants that are confirmed as pathogenic within MITOMAP are listed as “known pathogenic” to avoid confusion. Similarly, high-frequency variants (&gt;1% of all GenBank sequences or &gt;10% in any single major haplogroup division) are listed as “frequent polymorphism.” The use of the MITOMAP platform simultaneously directs users to underlying literature supporting the assignment of variants.</p>
<p>For the target novel mutations, which could all be considered variants of uncertain significance, the pathogenicity prediction is provided by percentile (ranging from 1–99%). Conveniently, the optimal point of the sensitivity/specificity curves is at the 51st centile for pathogenicity scoring. We have chosen to provide an interpretation with four categories (likely pathogenic/possibly pathogenic/possibly benign/likely benign) based upon the quartile scored. We have done this to generally conform with ACMG recommendations for the description of sequence variants [<xref ref-type="bibr" rid="pcbi.1005867.ref011">11</xref>]. The underlying subpart scoring is also available to interested users.</p>
<p>We have not incorporated the heteroplasmy of a variant into our scoring. It is widely accepted that heteroplasmic variants are more likely to be pathogenic and low-penetrance variants that are homoplasmic are less common. The pathogenicity scoring from MitoTIP for newly observed variants can and should be evaluated by the end user in the context of the actual patient heteroplasmy and the heteroplasmy seen in affected and unaffected family members.</p>
<p>MitoTIP places considerable reliance on databases, which provides important advantages and disadvantages. Full sequence entries used to infer normal human variation might have been obtained from patients with mitochondrial disorders. The underlying calls of pathogenicity represent a best effort at identifying all legitimate reports of mitochondrial variants but may have missed some reports. In addition, pathogenic variant databases may fall out of date or contain errors, as described above for m.5628T&gt;C. The possibility exists that the associations made between haplogroup-defining variants and disease states are incorrect and are due to the coincidence of maternally inherited mitochondrial variants and unmeasured nuclear variants that are actually responsible for the heightened risk of common phenotypes. Providing MitoTIP data in the context of access to these studies will allow users to integrate multiple sources of information when assessing unfamiliar variants.</p>
<p>The use of databases is advantageous because it allows MitoTIP scoring to be easily updated when new information is incorporated into MITOMAP. The system will improve in sensitivity and specificity over time as more sequences are available in MITOMAP and more reports of pathogenic mutations are made.</p>
</sec>
<sec id="sec005">
<title>Supporting information</title>
<supplementary-material id="pcbi.1005867.s001" mimetype="image/png" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.s001" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Pathogenicity scoring algorithm.</title>
<p>The MitoTIP score has three main components: <bold>the variant history and conservation score</bold>, <bold>the position score</bold>, and the <bold>secondary structure score</bold>. The variant history and conservation score is derived from the history of previously reported pathogenic and benign variants, and interspecies sequence conservation. The variant history and conservation data are imported from MITOMAP and Mamit-tRNA, respectively. In benign variants, the GenBank population frequency is calculated and the variants are categorized by percentile rank to generate the <bold>pop score</bold>. Pathogenic variants from the database are stratified by heteroplasmy and whether pathogenicity is confirmed to generate the <bold>path score</bold>. The conservation data for species in the superorder Euarchontoglires was evaluated using a logarithmic function that quantifies each position’s deviation from complete conservation to generate the <bold>cons score</bold>. The <bold>pop score</bold>, <bold>path score</bold>, and <bold>cons score</bold> were evaluated based on the decision tree and scaling factors shown in the figure to generate the <bold>variant hx and conservation score</bold>. The <bold>position score</bold> is calculated by aligning the tRNAs by secondary structure and averaging the <bold>variant history and conservation scores</bold> at the aligned analogous positions. This highlights the positions of the tRNA that are most vulnerable to disease causing variants. Finally the <bold>secondary structure score</bold> is calculated based on the location of the variant within the stem and the steric hindrance induced by the base pair change. Changes at the ends of the stem, and those causing the greatest steric hindrance are considered to be most disruptive to secondary structure and thus assigned the highest scores. Finally the <bold>variant history and conservation score</bold>, <bold>position score</bold>, and <bold>secondary structure score</bold> are scaled by their respective scaling factors and summed to generate the <bold>pathogenicity score</bold>.</p>
<p>(PNG)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1005867.s002" mimetype="image/png" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.s002" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>MitoTIP interface.</title>
<p>(PNG)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1005867.s003" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.s003" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>Variants used in optimization.</title>
<p>Pathogenic variants included all variants from MITOMAP with confirmed disease-association plus literature-identified variants meeting the criteria of association with disease and either single-fiber or cybrid confirmation. Benign variants were obtained from the list of “mtDNA Variants” on MITOMAP after filtering out any positions with reports of disease-association.</p>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1005867.s004" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.s004" xlink:type="simple">
<label>S2 Table</label>
<caption>
<title>Optimization of pathogenicity scoring.</title>
<p>The MitoTIP algorithm has six scaling factors to adjust the weight of the various sources of information (<xref ref-type="supplementary-material" rid="pcbi.1005867.s001">S1 Fig</xref>). The relative weight of variant history (pop and path score) and interspecies conservation (cons score) is represented by the var_hx_scal and cons_scal variables. The weight of the both factors together is scaled by var_hx_cons_scal. The secondary structure score is scaled by the SS_scal variable and the position score is scaled by the Pos_scal variable. Finally, a base_scal variable controls the base score that is applied to novel variants with no previous variant history. In order to optimize these variables we sought to maximize the sensitivity and specificity of MitoTIP at classifying known pathogenic and benign variants (<xref ref-type="supplementary-material" rid="pcbi.1005867.s003">S1 Table</xref>) using a take-one-out approach. The SciPy package for python was used to perform differential evolution optimization, which seeks to find the minimum for a multivariate function. The MitoTIP algorithm modified to take the 6 variables as input and output single value that captures the performance of the algorithm. This value was calculated as 2-((sensitivity + specificity)-Abs(sensitivity-specificity)), and is at a minimum when both sensitivity and specificity are maximized. The solution provided by the differential evolution algorithm varies each time that the algorithm is run. The table shows results from four sample runs, with the highlighted row showing the chosen optimized settings for MitoTIP.</p>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1005867.s005" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.s005" xlink:type="simple">
<label>S3 Table</label>
<caption>
<title>Haplogroup defining variants with high pathogenicity scores.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1005867.s006" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1005867.s006" xlink:type="simple">
<label>S4 Table</label>
<caption>
<title>Pathogenic variants with low pathogenicity scores.</title>
<p>(DOCX)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pcbi.1005867.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>González-Vioque</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Bornstein</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Gallardo</surname> <given-names>ME</given-names></name>, <name name-style="western"><surname>Fernández-Moreno</surname> <given-names>MÁ</given-names></name>, <name name-style="western"><surname>Garesse</surname> <given-names>R</given-names></name>. <article-title>The pathogenicity scoring system for mitochondrial tRNA mutations revisited</article-title>. <source>Mol Genet genomic Med</source>. <year>2014</year>;<volume>2</volume>: <fpage>107</fpage>–<lpage>14</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/mgg3.47" xlink:type="simple">10.1002/mgg3.47</ext-link></comment> <object-id pub-id-type="pmid">24689073</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hardy</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Blakely</surname> <given-names>EL</given-names></name>, <name name-style="western"><surname>Purvis</surname> <given-names>AI</given-names></name>, <name name-style="western"><surname>Rocha</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Ahmed</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Falkous</surname> <given-names>G</given-names></name>,<etal>et al</etal>. <article-title>Pathogenic mtDNA mutations causing mitochondrial myopathy: The need for muscle biopsy</article-title>. <source>Neurol Genet</source>. <year>2016</year>;<volume>2</volume>: <fpage>e82</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1212/NXG.0000000000000082" xlink:type="simple">10.1212/NXG.0000000000000082</ext-link></comment> <object-id pub-id-type="pmid">27536729</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kondrashov</surname> <given-names>FA</given-names></name>. <article-title>Prediction of pathogenic mutations in mitochondrially encoded human tRNAs</article-title>. <source>Hum Mol Genet</source>. <year>2005</year>;<volume>14</volume>: <fpage>2415</fpage>–<lpage>2419</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/hmg/ddi243" xlink:type="simple">10.1093/hmg/ddi243</ext-link></comment> <object-id pub-id-type="pmid">16014637</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ruiz-Pesini</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Wallace</surname> <given-names>DC</given-names></name>. <article-title>Evidence for adaptive selection acting on the tRNA and rRNA genes of human mitochondrial DNA</article-title>. <source>Hum Mutat</source>. <year>2006</year>;<volume>27</volume>: <fpage>1072</fpage>–<lpage>81</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/humu.20378" xlink:type="simple">10.1002/humu.20378</ext-link></comment> <object-id pub-id-type="pmid">16947981</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Niroula</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Vihinen</surname> <given-names>M</given-names></name>. <article-title>PON-mt-tRNA: a multifactorial probability-based method for classification of mitochondrial tRNA variations</article-title>. <source>Nucleic Acids Res</source>. <year>2016</year>; <fpage>gkw046</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkw046" xlink:type="simple">10.1093/nar/gkw046</ext-link></comment> <object-id pub-id-type="pmid">26843426</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lott</surname> <given-names>MT</given-names></name>, <name name-style="western"><surname>Leipzig</surname> <given-names>JN</given-names></name>, <name name-style="western"><surname>Derbeneva</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Xie</surname> <given-names>HM</given-names></name>, <name name-style="western"><surname>Chalkia</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Sarmady</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>mtDNA Variation and Analysis Using Mitomap and Mitomaster</article-title>. <source>Curr Protoc Bioinforma</source>. <year>2013</year>;<volume>44</volume>: <fpage>1.23.1</fpage>–<lpage>26</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/0471250953.bi0123s44" xlink:type="simple">10.1002/0471250953.bi0123s44</ext-link></comment> <object-id pub-id-type="pmid">25489354</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pütz</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Dupuis</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Sissler</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Florentz</surname> <given-names>C</given-names></name>. <article-title>Mamit-tRNA, a database of mammalian mitochondrial tRNA primary and secondary structures</article-title>. <source>RNA</source>. <year>2007</year>;<volume>13</volume>: <fpage>1184</fpage>–<lpage>90</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1261/rna.588407" xlink:type="simple">10.1261/rna.588407</ext-link></comment> <object-id pub-id-type="pmid">17585048</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Oliphant</surname> <given-names>TE</given-names></name>. <article-title>SciPy: Open source scientific tools for Python</article-title>. <source>Comput Sci Eng</source>. <year>2007</year>;<volume>9</volume>: <fpage>10</fpage>–<lpage>20</lpage>.</mixed-citation></ref>
<ref id="pcbi.1005867.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Spagnolo</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Tomelleri</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Vattemi</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Filosto</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Rizzuto</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Tonin</surname> <given-names>P</given-names></name>. <article-title>A new mutation in the mitochondrial tRNA(Ala) gene in a patient with ophthalmoplegia and dysphagia</article-title>. <source>Neuromuscul Disord</source>. <year>2001</year>;<volume>11</volume>: <fpage>481</fpage>–<lpage>4</lpage>. Available: <ext-link ext-link-type="uri" xlink:href="http://www.ncbi.nlm.nih.gov/pubmed/11404121" xlink:type="simple">http://www.ncbi.nlm.nih.gov/pubmed/11404121</ext-link> <object-id pub-id-type="pmid">11404121</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Han</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Dai</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Zhu</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Huang</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Yuan</surname> <given-names>Y</given-names></name>, <etal>et al</etal>. <article-title>The mitochondrial tRNA(Ala) T5628C variant may have a modifying role in the phenotypic manifestation of the 12S rRNA C1494T mutation in a large Chinese family with hearing loss</article-title>. <source>Biochem Biophys Res Commun</source>. <year>2007</year>;<volume>357</volume>: <fpage>554</fpage>–<lpage>60</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.bbrc.2007.03.199" xlink:type="simple">10.1016/j.bbrc.2007.03.199</ext-link></comment> <object-id pub-id-type="pmid">17434445</object-id></mixed-citation></ref>
<ref id="pcbi.1005867.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Richards</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Aziz</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Bale</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Bick</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Das</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Gastier-Foster</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology</article-title>. <source>Genet Med. IOP Publishing</source>; <year>2015</year>;<volume>17</volume>: <fpage>405</fpage>–<lpage>423</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/gim.2015.30" xlink:type="simple">10.1038/gim.2015.30</ext-link></comment> <object-id pub-id-type="pmid">25741868</object-id></mixed-citation></ref>
</ref-list>
</back>
</article>