<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Comput Biol</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">ploscomp</journal-id>
<journal-title-group>
<journal-title>PLOS Computational Biology</journal-title>
</journal-title-group>
<issn pub-type="ppub">1553-734X</issn>
<issn pub-type="epub">1553-7358</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453</article-id>
<article-id pub-id-type="publisher-id">PCOMPBIOL-D-19-01091</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Pathology and laboratory medicine</subject><subj-group><subject>Pathogenesis</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Software tools</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Engineering and technology</subject><subj-group><subject>Software engineering</subject><subj-group><subject>Software tools</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Molecular biology</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>DNA sequencing</subject><subj-group><subject>Next-generation sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Molecular biology techniques</subject><subj-group><subject>Sequencing techniques</subject><subj-group><subject>DNA sequencing</subject><subj-group><subject>Next-generation sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Computational biology</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Transcriptome analysis</subject><subj-group><subject>Next-generation sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genome analysis</subject><subj-group><subject>Transcriptome analysis</subject><subj-group><subject>Next-generation sequencing</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genomics</subject><subj-group><subject>Genomic medicine</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genetics of disease</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Computing methods</subject><subj-group><subject>Control sequences</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Research design</subject><subj-group><subject>Cohort studies</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Genetic dominance</subject><subj-group><subject>Dominant traits</subject><subj-group><subject>Autosomal dominant traits</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>TAPES: A tool for assessment and prioritisation in exome studies</article-title>
<alt-title alt-title-type="running-head">TAPES: Prioritisation in exome studies</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-6397-051X</contrib-id>
<name name-style="western">
<surname>Xavier</surname>
<given-names>Alexandre</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-7724-3404</contrib-id>
<name name-style="western">
<surname>Scott</surname>
<given-names>Rodney J.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Talseth-Palmer</surname>
<given-names>Bente A.</given-names>
</name>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Supervision</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>School of Biomedical Sciences and Pharmacy, Faculty of Health and Medicine, University of Newcastle and Hunter Medical Research Institute, Newcastle, Australia</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>NSW Health Pathology North, John Hunter Hospital, Newcastle, Australia</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Clinic for Research, Innovation, Education and Development, Møre and Romsdal Hospital Trust, Molde, Norway</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Johns Hopkins University, UNITED STATES</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">alexandre.xavier@live.fr</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>15</day>
<month>10</month>
<year>2019</year>
</pub-date>
<pub-date pub-type="collection">
<month>10</month>
<year>2019</year>
</pub-date>
<volume>15</volume>
<issue>10</issue>
<elocation-id>e1007453</elocation-id>
<history>
<date date-type="received">
<day>30</day>
<month>6</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>1</day>
<month>10</month>
<year>2019</year>
</date>
</history>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Xavier et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pcbi.1007453"/>
<abstract>
<p>Next-generation sequencing continues to grow in importance for researchers. Exome sequencing became a widespread tool to further study the genomic basis of Mendelian diseases. In an effort to identify pathogenic variants, reject benign variants and better predict variant effects in downstream analysis, the American College of Medical Genetics (ACMG) published a set of criteria in 2015. While there are multiple publicly available software’s available to assign the ACMG criteria, most of them do not take into account multi-sample variant calling formats. Here we present a tool for assessment and prioritisation in exome studies (TAPES, <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes" xlink:type="simple">https://github.com/a-xavier/tapes</ext-link>), an open-source tool designed for small-scale exome studies. TAPES can quickly assign ACMG criteria using ANNOVAR or VEP annotated files and implements a model to transform the categorical ACMG criteria into a continuous probability, allowing for a more accurate classification of pathogenicity or benignity of variants. In addition, TAPES can work with cohorts sharing a common phenotype by utilising a simple enrichment analysis, requiring no controls as an input as well as providing powerful filtering and reporting options. Finally, benchmarks showed that TAPES outperforms available tools to detect both pathogenic and benign variants, while also integrating the identification of enriched variants in study cohorts compared to the general population, making it an ideal tool to evaluate a smaller cohort before using bigger scale studies.</p>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>New sequencing techniques allow researchers to study the genetic basis of diseases. Predicting the effect of genetic variants is critical to understand the mechanisms underlying disease. Available software can predict how pathogenic a variant is, but do not take into account the abundance of a variants in a cohort. TAPES is a simple open-source tool that can both more accurately predict pathogenicity (using probability over categories) and provide insight on variants enrichment in a cohort sharing the same disease.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>the Hunter Cancer Research Alliance</institution>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-6397-051X</contrib-id>
<name name-style="western">
<surname>Xavier</surname>
<given-names>Alexandre</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100001102</institution-id>
<institution>Cancer Council NSW</institution>
</institution-wrap>
</funding-source>
<principal-award-recipient>
<name name-style="western">
<surname>Talseth-Palmer</surname>
<given-names>Bente A.</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award003">
<funding-source>
<institution>The University Of Newcastle</institution>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-6397-051X</contrib-id>
<name name-style="western">
<surname>Xavier</surname>
<given-names>Alexandre</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>The Hunter Cancer Research Alliance (<ext-link ext-link-type="uri" xlink:href="https://www.hcra.com.au/" xlink:type="simple">https://www.hcra.com.au/</ext-link>) funded Bente Talseth-Palmer and Alexandre Xavier. The University of Newcastle (<ext-link ext-link-type="uri" xlink:href="https://www.newcastle.edu.au/" xlink:type="simple">https://www.newcastle.edu.au/</ext-link>) funded Alexandre Xavier. The Cancer Institute NSW (<ext-link ext-link-type="uri" xlink:href="https://www.cancercouncil.com.au/" xlink:type="simple">https://www.cancercouncil.com.au/</ext-link>) funded Bente Talseth-Palmer. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="3"/>
<table-count count="0"/>
<page-count count="9"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>PLOS Publication Stage</meta-name>
<meta-value>vor-update-to-uncorrected-proof</meta-value>
</custom-meta>
<custom-meta>
<meta-name>Publication Update</meta-name>
<meta-value>2019-10-25</meta-value>
</custom-meta>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All source code can be found at: <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes" xlink:type="simple">https://github.com/a-xavier/tapes</ext-link>. Documentation is also available through this repository and at: <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes/wiki" xlink:type="simple">https://github.com/a-xavier/tapes/wiki</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<disp-quote><p>This is a <italic>PLOS Computational Biology</italic> Software paper.</p></disp-quote>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>With the advances in Next-Generation Sequencing (NGS) technologies and the decline in price over the last few years, exome sequencing has become a standard tool to explore the genetic basis of inherited diseases [<xref ref-type="bibr" rid="pcbi.1007453.ref001">1</xref>]. It has become easy to annotate the ever-increasing amount of variants identified by such methods, using tools such as VEP [<xref ref-type="bibr" rid="pcbi.1007453.ref002">2</xref>], snpEff [<xref ref-type="bibr" rid="pcbi.1007453.ref003">3</xref>] or ANNOVAR [<xref ref-type="bibr" rid="pcbi.1007453.ref004">4</xref>]. These tools help researchers to better predict the downstream effect of a variant and give insight, for example, on the frequency of the mutation in the general population, the impact on proteins or in-silico predictions of pathogenicity.</p>
<p>In 2015, the American College of Medical Genetics (ACMG) published a set of criteria to assess the probability of a variant pathogenicity, classifying them into five categories [<xref ref-type="bibr" rid="pcbi.1007453.ref005">5</xref>], from benign to pathogenic, facilitating downstream analysis.</p>
<p>Since then, tools have been developed to assess individual variant pathogenicity using the ACMG criteria (such as CharGer [<xref ref-type="bibr" rid="pcbi.1007453.ref006">6</xref>] and Intervar [<xref ref-type="bibr" rid="pcbi.1007453.ref007">7</xref>]) but they do not have the ability to take into account the frequency of variants in a cohort. The categorical nature of the ACMG criteria also leaves a lot of variants classified as “a variant of unknown significance”.</p>
<p>Here, we present TAPES, an open-source tool to both assess and prioritise variants by pathogenicity. TAPES can assign the ACMG criteria and by using one of the first implementations of the model described in Tavtigian <italic>et al</italic>. [<xref ref-type="bibr" rid="pcbi.1007453.ref008">8</xref>], providing a more nuanced and easy to understand estimated probability for a variant to be either pathogenic or benign, thus transforming categorical classification into a more linear prediction. Our goal during development was first to create a simple tool that can better predict pathogenicity and reject benign variants, and then to assess a cohort sharing a phenotype by detecting enriched variants compared to the general population without the need of control samples. In addition, we focused on providing simple yet powerful reporting and filtering systems, while allowing pathway analysis of pathogenic mutations, gene-burden calculations and per-sample reporting.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Design and implementation</title>
<sec id="sec003">
<title>ANNOVAR interface and annotated variant file</title>
<p>TAPES sorting option can be used with both ANNOVAR and VEP annotated variant calling files (VCF). However we also provide users with simple wrapping tools for a local installation of ANNOVAR to simplify the workflow (this requires users to download ANNOVAR). Users can annotate VCF, gzipped VCF and binary VCF (BCF) using two simple commands without having to specify the databases and annotations to use.</p>
<p>While there are a set of annotation needed to assign all ACMG criteria (see <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes/wiki/Necessary-Annotations" xlink:type="simple">https://github.com/a-xavier/tapes/wiki/Necessary-Annotations</ext-link> for the full list), TAPES will use as many available annotations as possible to assign the relevant ACMG criteria.</p>
</sec>
<sec id="sec004">
<title>Variant classification</title>
<p><bold>TAPES</bold> requires annotated ANNOVAR (VCF or tab/comma-separated values) or VEP (VCF) files to use the sorting module.</p>
<sec id="sec005">
<title>Regular ACMG criteria assignment</title>
<p>For most of the ACMG criteria assignment (PVS1, PS1, PS3, PM1, PM2, PM4, PM5, PP2, PP3, PP5, BS1, BS2, BS3, BP1, BP3, BP4, BP6, BP7 and BA1), we tried to stay as true to the original ACMG definition as possible when implementing the criteria assignment. Please see Richards <italic>et al</italic>. [<xref ref-type="bibr" rid="pcbi.1007453.ref005">5</xref>] and <xref ref-type="supplementary-material" rid="pcbi.1007453.s001">S1 Table</xref> for more information on the ACMG Criteria definition.</p>
</sec>
<sec id="sec006">
<title>Enrichment analysis / PS4 criteria</title>
<p>One of TAPES unique features is the ability to calculate variant enrichment from public frequency data (ExAC or gNomad [<xref ref-type="bibr" rid="pcbi.1007453.ref009">9</xref>]), without having to sequence control samples. In cohort studies, TAPES require a multi-sample vcf file to extract genotyping data and get frequencies from the cohort studied. It uses a simple one-sided Fisher’s exact test to calculate both the Odds Ratio (OR) and the p-value of the enrichment. Only the variant enrichment in the cohort is tested against the general population.</p>
<p>Since OR calculation requires integer numbers and frequency in the general population is given as a 0–1 fraction, TAPES approximates the number of individuals affected using the following formula.</p>
<p>If <italic>MAF</italic><sub><italic>c</italic></sub> is the Minor Allele Frequency (MAF) in a control population, <italic>n</italic><sub><italic>c</italic></sub> is the number of individuals affected by the variant in the control population and <italic>N</italic><sub><italic>c</italic></sub> is the number of individuals without the variant then:
<disp-formula id="pcbi.1007453.e001">
<alternatives>
<graphic id="pcbi.1007453.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007453.e001" xlink:type="simple"/>
<mml:math display="block" id="M1">
<mml:mrow><mml:mi>M</mml:mi><mml:mi>A</mml:mi><mml:msub><mml:mi>F</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>y</mml:mi><mml:mo>×</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>−</mml:mo><mml:mi>x</mml:mi></mml:mrow></mml:msup><mml:mo>,</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mrow><mml:mo>⌈</mml:mo><mml:mi>y</mml:mi><mml:mo>⌉</mml:mo></mml:mrow><mml:mspace width="4pt"/><mml:mtext>and</mml:mtext><mml:mspace width="4pt"/><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mi>x</mml:mi></mml:msup></mml:mrow><mml:mn>2</mml:mn></mml:mfrac><mml:mo>−</mml:mo><mml:msub><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>.</mml:mo></mml:mrow>
</mml:math>
</alternatives>
</disp-formula></p>
<p>For example if:
<disp-formula id="pcbi.1007453.e002">
<alternatives>
<graphic id="pcbi.1007453.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007453.e002" xlink:type="simple"/>
<mml:math display="block" id="M2">
<mml:mrow><mml:mi>M</mml:mi><mml:mi>A</mml:mi><mml:msub><mml:mi>F</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>3.23</mml:mn><mml:mo>×</mml:mo><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mrow><mml:mo>−</mml:mo><mml:mn>5</mml:mn><mml:mspace width="4pt"/></mml:mrow></mml:msup><mml:mtext>then</mml:mtext><mml:mspace width="4pt"/><mml:msub><mml:mi>n</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mn>4</mml:mn><mml:mspace width="4pt"/><mml:mtext>and</mml:mtext><mml:mspace width="4pt"/><mml:msub><mml:mi>N</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msup><mml:mrow><mml:mn>10</mml:mn></mml:mrow><mml:mn>5</mml:mn></mml:msup></mml:mrow><mml:mn>2</mml:mn></mml:mfrac><mml:mo>−</mml:mo><mml:mn>4.</mml:mn></mml:mrow>
</mml:math>
</alternatives>
</disp-formula></p>
<p>This approximation is only valid if the following assumptions are made; MAF in the control population is under 0.05 and that very rare variants are mostly heterozygous.</p>
<p>The PS4 criteria assignment was designed to be more stringent than a normal study with controls (choosing to overestimate the frequency in the general population) and will only be assigned if OR ≥ 20, p-value ≤ 0.001 and at least 2 individuals in the cohort share the variant.</p>
</sec>
<sec id="sec007">
<title>Trio analysis / PS2 assignment</title>
<p>TAPES allow researchers to work with trio studies. In trio studies, the user provides information such as sample name, trio ID and pedigree information in a tab-delimited file. Then PS2 will be assigned if a variant is identified as <italic>de-novo</italic> and healthy parents are removed from downstream analysis. PS2 is assigned to a variant if it was found as <italic>de-novo</italic> in any trio but details from each trio will still be provided.</p>
</sec>
<sec id="sec008">
<title>Probability of pathogenicity calculation</title>
<p>TAPES includes the model developed by Tavtigian <italic>et al</italic>. [<xref ref-type="bibr" rid="pcbi.1007453.ref008">8</xref>] to transform ACMG categorical classification into linear probability of pathogenicity and the method uses the default parameters from (Prior P = 0.10, O<sub>PVSt</sub> = 350 and X = 2). This allows for a finer pathogenicity prediction and adjustable thresholds to decide variant pathogenicity. It is important to keep in mind that this measure is a probability and not a measure of how pathogenic a variant is.</p>
</sec>
</sec>
<sec id="sec009">
<title>Cohort reporting</title>
<p><bold>TAPES</bold> provides an array of different useful reports.</p>
<sec id="sec010">
<title>Filtering</title>
<p>TAPES can easily perform advance filtering. Three different options are available. First, users can provide a custom list of gene symbols (either as a text file or directly on the command line) to only output variants present in those genes. Then users can also do a reverse pathway search by providing the name of a pathway (extracted from KEGG pathways [<xref ref-type="bibr" rid="pcbi.1007453.ref010">10</xref>]) and output a report with variants in genes involved in that pathway. Finally, users can run searches based on terms contained in the description for each gene, i.e. if the user looks for ‘autosomal dominant’ genes or ‘colorectal cancer’ genes. These filtered reports keep the same format as the main report, making it possible to use them with other reporting tools.</p>
</sec>
<sec id="sec011">
<title>By-sample report</title>
<p>For each individual in the cohort, a report containing the variant predicted to be pathogenic with the highest level of confidence will be available. This allows the study of individual samples and their specificity.</p>
</sec>
<sec id="sec012">
<title>By-gene report</title>
<p>TAPES can also calculate, for each gene, a gene burden score. This score helps determining which genes harbour the most potentially pathogenic variants in a cohort. This can be useful when searching for variants in diseases caused by single genes and that cannot be discovered using pathway analysis. The gene burden score is calculated by summing the probability of pathogenicity of a specific variant multiplied by the number of individuals with that genotype in the cohort.</p>
<disp-formula id="pcbi.1007453.e003">
<alternatives>
<graphic id="pcbi.1007453.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pcbi.1007453.e003" xlink:type="simple"/>
<mml:math display="block" id="M3">
<mml:mi>G</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>e</mml:mi><mml:mspace width="4pt"/><mml:mi>b</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>d</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mspace width="4pt"/><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:munderover><mml:mo stretchy="false">∑</mml:mo><mml:mrow><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>n</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>×</mml:mo><mml:msub><mml:mrow><mml:mi>N</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<p>Calculated for each gene, where <italic>P</italic><sub><italic>i</italic></sub> = the probability of pathogenicity of the variant and <italic>N</italic><sub><italic>i</italic></sub> = Number of samples affected by the variant. If <italic>P</italic><sub><italic>i</italic></sub> ≤ 0.80 then the variant is excluded.</p>
<p>This measure is useful to detect which genes in the cohort are particularly enriched in pathogenic and probably pathogenic variants (it is important to remember that this measure is a sum of probabilities). However, there are a few caveats. This measure might be affected by very long genes or genes frequently mutated in exomes (FLAGS [<xref ref-type="bibr" rid="pcbi.1007453.ref011">11</xref>]). In some cases, poorly mapped reads (for example due to pseudo-autosomal regions in the X or Y chromosome), might impact the result with an excessive number of samples affected by a variant. TAPES provides an appropriate warning for all of those cases.</p>
</sec>
<sec id="sec013">
<title>Pathway analysis</title>
<p>TAPES can also perform a pathway analysis using the EnrichR [<xref ref-type="bibr" rid="pcbi.1007453.ref012">12</xref>] API. Only genes containing variants that are predicted to be pathogenic are kept as a gene list. The user can then use any library to analyse the gene list but the default is GO_Biological_Process_2018. Pathway analysis is important to understand the possibly disrupted mechanism and the commonalities between variants found in a cohort.</p>
</sec>
</sec>
</sec>
<sec id="sec014" sec-type="results">
<title>Results</title>
<sec id="sec015">
<title>Variant classification</title>
<p>TAPES variant classification was benchmarked against similar tools, CharGer [<xref ref-type="bibr" rid="pcbi.1007453.ref006">6</xref>] and Intervar [<xref ref-type="bibr" rid="pcbi.1007453.ref007">7</xref>] using the prediction on the pathogenicity of variants of the expert panel of Zhang et al., 2015 as reference [<xref ref-type="bibr" rid="pcbi.1007453.ref013">13</xref>] (see <xref ref-type="supplementary-material" rid="pcbi.1007453.s002">S2 Table</xref> for the full table). This dataset was also used to benchmark CharGer in their original publication. The ‘probably pathogenic’ and ‘pathogenic’ predictions were pooled into one ‘pathogenic’ group. Similarly the ‘probably benign’ and ‘benign’ were pooled into one ‘benign’ group.</p>
<p>To assess the predictive power of each software, we used Receiver Operating Characteristics (ROC) curves and calculated the area under the curve (AUC) as well as the precision-recall curves and average precision (AP). We compared TAPES ACMG and probability of pathogenicity prediction with CharGer score and InterVar ACMG prediction (see <xref ref-type="fig" rid="pcbi.1007453.g001">Fig 1</xref>).</p>
<fig id="pcbi.1007453.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007453.g001</object-id>
<label>Fig 1</label>
<caption>
<title>ROC curves and precision recall curves.</title>
<p>a) ROC curve of various softwares for pathogenicity prediction AUC b) ROC curve of various softwares for benignity prediction AUC c) Precision-recall curve of various softwares for pathogenicity prediction d) Precision-recall curve of various softwares for benignity prediction (Metrics used; TAPES proba; TAPES probability of pathogenicity prediction, TAPES ACMG: TAPES ACMG prediction, CharGer score: CharGer prediction of pathogenicity based of a custom score, InterVar ACMG: InterVar ACMG prediction).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.g001" xlink:type="simple"/>
</fig>
<p>TAPES probability of pathogenicity, using Tavtigian <italic>et al</italic> [<xref ref-type="bibr" rid="pcbi.1007453.ref008">8</xref>] modelling, outperformed both software’s tested using AUC and AP for prediction of both pathogenic and benign variants.</p>
<p>AUC and AP show that using TAPES ACMG criteria assignment remains less precise than using CharGer custom score (due to the additional information CharGer need to function properly) and closer to InterVar. Using the probability of pathogenicity should be the preferred way to identify pathogenic variants and reject benign variants. Based on ROC curves, a threshold of 0.80–0.85 for probability of pathogenicity seemed to keep high true positive rate (TPR) while low false positive rate (FPR) for predicting pathogenic variants. Similarly, a threshold of 0.20–0.35 for probability of pathogenicity had high TPR and low FPR for predicting benignity.</p>
<p>To validate these findings and choose the best probability thresholds for pathogenicity and benignity, we used TAPES, InterVar and CharGer on a different dataset <bold>(see</bold> <xref ref-type="supplementary-material" rid="pcbi.1007453.s003">S3 Table</xref>). Using 530 hand curated variants from ClinGen evidence repository (<ext-link ext-link-type="uri" xlink:href="https://erepo.clinicalgenome.org/evrepo/" xlink:type="simple">https://erepo.clinicalgenome.org/evrepo/</ext-link>) as ground truth. TAPES outperformed both InterVar and CharGer (<bold>see</bold> <xref ref-type="fig" rid="pcbi.1007453.g002">Fig 2</xref>). In addition to the precision of the prediction, TAPES also outperformed other software in terms of absolute number of variants correctly identified.</p>
<fig id="pcbi.1007453.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007453.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Validation dataset software comparisons.</title>
<p><bold>a) Percentage of identical calls between the ClinGen expert panel decisions and software prediction</bold>. Lenient thresholds are 0.80 for pathogenicity and 0.35 for benignity. Strict thresholds are 0.85 for pathogenicity and 0.20 for benignity. <bold>b) Absolute number of variants predictions</bold>. Pathogenic and benign variants correctly and incorrectly identified between the panel of expert and various software. (Metrics used; <underline>TAPES probability lenient</underline>; TAPES probability of pathogenicity prediction 0.35–0.80, <underline>TAPES probability strict</underline>; TAPES probability of pathogenicity prediction 0.20–0.85 <underline>TAPES ACMG</underline>: TAPES ACMG prediction, <underline>CharGer</underline>: CharGer prediction of pathogenicity based of a custom score, <underline>InterVar ACMG</underline>: InterVar ACMG prediction).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.g002" xlink:type="simple"/>
</fig>
<p>We recommend to use TAPES probability of pathogenicity prediction with either lenient thresholds of 0.8 and 0.35 (respectively for pathogenicity and benignity) or stricter thresholds of 0.85 and 0.20.</p>
</sec>
<sec id="sec016">
<title>Variant enrichment / PS4 benchmark</title>
<p>We compared our method of calculation of ORs compared to the normal method (see <xref ref-type="fig" rid="pcbi.1007453.g003">Fig 3</xref>).</p>
<fig id="pcbi.1007453.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pcbi.1007453.g003</object-id>
<label>Fig 3</label>
<caption>
<title>PS4 calculation with Fisher’s exact test one sided.</title>
<p>Comparison of TAPES extrapolation of odds rations compared to the normal method (top graph). Comparison of the p-value of both methods (bottom graph). The <bold>vertical dotted line</bold> represents the known frequency of the variant in the studied cohort (0.025). The <bold>horizontal green dotted line</bold> represents the thresholds used to assign PS4 (OR = 20 or ln(OR) = 2.9957 (top) and p-value &lt; 0.01(bottom)).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.g003" xlink:type="simple"/>
</fig>
<p>The OR using TAPES extrapolation is always smaller than the normal calculation, making it more stringent. Similarly, the p-value of the Fisher’s exact test rises faster with frequency than the normal method. This way, only the most significantly enriched variant are assigned with PS4 to ensure very few false positives.</p>
<sec id="sec017">
<title>Reporting options</title>
<p>TAPES reporting options are powerful and easy to use. Using a mock input file with variants from Zhang <italic>et al</italic>. [<xref ref-type="bibr" rid="pcbi.1007453.ref013">13</xref>] as well as simulated samples to form a cohort, the pathway analysis correctly identified DNA repair as the pathway with the most probable pathogenic variants.</p>
<p>The by-gene report also identified BRCA2 as the gene with the highest gene burden.</p>
<p>See <xref ref-type="supplementary-material" rid="pcbi.1007453.s004">S1 File</xref> to see all reports templates.</p>
</sec>
</sec>
</sec>
<sec id="sec018">
<title>Availability and future directions</title>
<p>TAPES is available on github at: <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes" xlink:type="simple">https://github.com/a-xavier/tapes</ext-link>, under the MIT licence, which allows anyone to both freely download and modify the source code. Help can be found both in the manual (located in the main repository) or on the wiki (<ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes/wiki" xlink:type="simple">https://github.com/a-xavier/tapes/wiki</ext-link>). Examples of inputs can also be found in the main repository. Dependencies can be easily installed using PyPi repositories (pip). All builds are verified through Travis continuous integration on Linux, Windows and macOS. All benchmarks and examples showed in this manuscript were generated using TAPES release 0.1.</p>
<p>All benchmarks and examples were generated using the initial release 0.1 of TAPES (<ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes/releases" xlink:type="simple">https://github.com/a-xavier/tapes/releases</ext-link>).</p>
<p>TAPES will continue to evolve with the advances in various databases such as ExAC, dnSNP or dbNSFP. As they constantly update their data and the format, TAPES will evolve to be more precise and accurate. In addition, future directions include more statistical measures to detect significant variants in different cohort studies.</p>
<p>We aim to keep TAPES as simple and useful as possible to make it a perfect endpoint tool to analyse variants from small-scale cohorts.</p>
</sec>
<sec id="sec019">
<title>Supporting information</title>
<supplementary-material id="pcbi.1007453.s001" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s001" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>ACMG criteria assignment in TAPES and definitions from the original Richards et al 2015 article.</title>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007453.s002" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s002" xlink:type="simple">
<label>S2 Table</label>
<caption>
<title>Comparison of Prediction between different pathogenicity assessment software and the expert panel from Zhang J et al. 2015.</title>
<p>Comparison between TAPES ACMG and pathogenicity probability prediction, CharGer Prediction Score and InterVar AMCG Prediction.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007453.s003" mimetype="text/plain" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s003" xlink:type="simple">
<label>S3 Table</label>
<caption>
<title>Comparison of Prediction between different pathogenicity assessment software and the expert panel from ClinGen evidence repository variants.</title>
<p>Comparison between TAPES ACMG and pathogenicity probability prediction, CharGer Prediction Score and InterVar AMCG Prediction.</p>
<p>(TXT)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007453.s004" mimetype="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s004" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Example reports from TAPES sort option.</title>
<p>Generated using the data from: Zhang, J., et al. Germline Mutations in Predisposition Genes in Pediatric Cancer. N Engl J Med 2015;373(24):2336–2346. Using the command: <italic>python tapes</italic>.<italic>py sort -i</italic> .<italic>/input</italic>.<italic>csv -o</italic> .<italic>/Report/ --tab --by_gene --by_sample --enrichr --disease "autosomal dominant" --kegg "Pathways in cancer"</italic>. This file gives examples for the main report, the by-gene report, the by-sample report, the enrichr report, the disease report and the kegg report.</p>
<p>(XLSX)</p>
</caption>
</supplementary-material>
<supplementary-material id="pcbi.1007453.s005" mimetype="application/zip" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s005" xlink:type="simple">
<label>S2 File</label>
<caption>
<title>Files used for TAPES benchmark and validation.</title>
<p><bold>The Initial Benchmark folder contains all files used for the original benchmark, CharGer_and_Panel_Benchmark.xlsx:</bold> CharGer pathogenicity prediction and expert panel decision from from: Zhang, J., et al. 2015, extracted from CharGer original publication, <bold>Synthetic_VCF_for_Benchmark.vcf.vcf:</bold> Synthetic VCF file created from the <bold>CharGer_and_Panel_Benchmark.xlsx</bold> variants information, <bold>InterVar_Benchmark.txt:</bold> InterVar predictions of pathogenicity after analysis of the synthetic VCF, <bold>TAPES_Benchmark.xlsx</bold>: TAPES prediction of pathogenicity after analysis of the synthetic VCF. The results of all 3 software are compiled in <xref ref-type="supplementary-material" rid="pcbi.1007453.s002">S2 Table</xref>. <bold>The Validation folder contains all filed used for the validation of the pathogenicity thresholds and comparison with other software. TAPES_validation_synthetic.vcf</bold>: Synthetic VCF created with data extracted from the ClinGen evidence repository (<ext-link ext-link-type="uri" xlink:href="https://erepo.clinicalgenome.org/evrepo/" xlink:type="simple">https://erepo.clinicalgenome.org/evrepo/</ext-link>), <bold>TAPES_validation.charger.txt:</bold> the CharGer predictions of pathogenicity after analysis of the Synthetic VCF, <bold>TAPES_Validation.intervar.txt:</bold> InterVar prediction of pathogenicity after analysis of the synthetic VCF, <bold>TAPES_Validation.tapes.txt:</bold> TAPES prediction of pathogenicity after analysis of the Synthetic VCF. The results of all 3 software are compiled in <xref ref-type="supplementary-material" rid="pcbi.1007453.s003">S3 Table</xref>.</p>
<p>(ZIP)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>The authors would like to thank Mr. Sean Burnard for his helpful advices regarding this manuscript.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pcbi.1007453.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bamshad</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Ng</surname> <given-names>SB</given-names></name>, <name name-style="western"><surname>Bigham</surname> <given-names>AW</given-names></name>, <name name-style="western"><surname>Tabor</surname> <given-names>HK</given-names></name>, <name name-style="western"><surname>Emond</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Nickerson</surname> <given-names>DA</given-names></name>, <etal>et al</etal>. <article-title>Exome sequencing as a tool for Mendelian disease gene discovery</article-title>. <source>Nat Rev Genet</source>. <year>2011</year>;<volume>12</volume>(<issue>11</issue>):<fpage>745</fpage>–<lpage>55</lpage>. Epub 2011/09/29. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nrg3031" xlink:type="simple">10.1038/nrg3031</ext-link></comment> <object-id pub-id-type="pmid">21946919</object-id>.</mixed-citation></ref>
<ref id="pcbi.1007453.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>McLaren</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Gil</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Hunt</surname> <given-names>SE</given-names></name>, <name name-style="western"><surname>Riat</surname> <given-names>HS</given-names></name>, <name name-style="western"><surname>Ritchie</surname> <given-names>GR</given-names></name>, <name name-style="western"><surname>Thormann</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>The Ensembl Variant Effect Predictor</article-title>. <source>Genome Biol</source>. <year>2016</year>;<volume>17</volume>(<issue>1</issue>):<fpage>122</fpage>. Epub 2016/06/09. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s13059-016-0974-4" xlink:type="simple">10.1186/s13059-016-0974-4</ext-link></comment> <object-id pub-id-type="pmid">27268795</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cingolani</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Platts</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Wang le</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Coon</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Nguyen</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>L</given-names></name>, <etal>et al</etal>. <article-title>A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3</article-title>. <source>Fly (Austin)</source>. <year>2012</year>;<volume>6</volume>(<issue>2</issue>):<fpage>80</fpage>–<lpage>92</lpage>. Epub 2012/06/26. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.4161/fly.19695" xlink:type="simple">10.4161/fly.19695</ext-link></comment> <object-id pub-id-type="pmid">22728672</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wang</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Hakonarson</surname> <given-names>H</given-names></name>. <article-title>ANNOVAR: functional annotation of genetic variants from high-throughput sequencing data</article-title>. <source>Nucleic Acids Res</source>. <year>2010</year>;<volume>38</volume>(<issue>16</issue>):<fpage>e164</fpage>. Epub 2010/07/06. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkq603" xlink:type="simple">10.1093/nar/gkq603</ext-link></comment> <object-id pub-id-type="pmid">20601685</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Richards</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Aziz</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Bale</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Bick</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Das</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Gastier-Foster</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Standards and guidelines for the interpretation of sequence variants: a joint consensus recommendation of the American College of Medical Genetics and Genomics and the Association for Molecular Pathology</article-title>. <source>Genet Med</source>. <year>2015</year>;<volume>17</volume>(<issue>5</issue>):<fpage>405</fpage>–<lpage>24</lpage>. Epub 2015/03/06. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/gim.2015.30" xlink:type="simple">10.1038/gim.2015.30</ext-link></comment> <object-id pub-id-type="pmid">25741868</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Scott</surname> <given-names>AD</given-names></name>, <name name-style="western"><surname>Huang</surname> <given-names>KL</given-names></name>, <name name-style="western"><surname>Weerasinghe</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Mashl</surname> <given-names>RJ</given-names></name>, <name name-style="western"><surname>Gao</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Martins Rodrigues</surname> <given-names>F</given-names></name>, <etal>et al</etal>. <article-title>CharGer: Clinical Characterization of Germline Variants</article-title>. <source>Bioinformatics</source>. <year>2018</year>. Epub 2018/08/14. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/bty649" xlink:type="simple">10.1093/bioinformatics/bty649</ext-link></comment> <object-id pub-id-type="pmid">30102335</object-id>.</mixed-citation></ref>
<ref id="pcbi.1007453.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Li</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>K</given-names></name>. <article-title>InterVar: Clinical Interpretation of Genetic Variants by the 2015 ACMG-AMP Guidelines</article-title>. <source>Am J Hum Genet</source>. <year>2017</year>;<volume>100</volume>(<issue>2</issue>):<fpage>267</fpage>–<lpage>80</lpage>. Epub 2017/01/31. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.ajhg.2017.01.004" xlink:type="simple">10.1016/j.ajhg.2017.01.004</ext-link></comment> <object-id pub-id-type="pmid">28132688</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Tavtigian</surname> <given-names>SV</given-names></name>, <name name-style="western"><surname>Greenblatt</surname> <given-names>MS</given-names></name>, <name name-style="western"><surname>Harrison</surname> <given-names>SM</given-names></name>, <name name-style="western"><surname>Nussbaum</surname> <given-names>RL</given-names></name>, <name name-style="western"><surname>Prabhu</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Boucher</surname> <given-names>KM</given-names></name>, <etal>et al</etal>. <article-title>Modeling the ACMG/AMP variant classification guidelines as a Bayesian classification framework</article-title>. <source>Genet Med</source>. <year>2018</year>;<volume>20</volume>(<issue>9</issue>):<fpage>1054</fpage>–<lpage>60</lpage>. Epub 2018/01/05. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/gim.2017.210" xlink:type="simple">10.1038/gim.2017.210</ext-link></comment> <object-id pub-id-type="pmid">29300386</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lek</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Karczewski</surname> <given-names>KJ</given-names></name>, <name name-style="western"><surname>Minikel</surname> <given-names>EV</given-names></name>, <name name-style="western"><surname>Samocha</surname> <given-names>KE</given-names></name>, <name name-style="western"><surname>Banks</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Fennell</surname> <given-names>T</given-names></name>, <etal>et al</etal>. <article-title>Analysis of protein-coding genetic variation in 60,706 humans</article-title>. <source>Nature</source>. <year>2016</year>;<volume>536</volume>(<issue>7616</issue>):<fpage>285</fpage>–<lpage>91</lpage>. Epub 2016/08/19. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nature19057" xlink:type="simple">10.1038/nature19057</ext-link></comment> <object-id pub-id-type="pmid">27535533</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kanehisa</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Furumichi</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Tanabe</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Sato</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Morishima</surname> <given-names>K</given-names></name>. <article-title>KEGG: new perspectives on genomes, pathways, diseases and drugs</article-title>. <source>Nucleic Acids Res</source>. <year>2017</year>;<volume>45</volume>(<issue>D1</issue>):<fpage>D353</fpage>–<lpage>D61</lpage>. Epub 2016/12/03. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkw1092" xlink:type="simple">10.1093/nar/gkw1092</ext-link></comment> <object-id pub-id-type="pmid">27899662</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shyr</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Tarailo-Graovac</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Gottlieb</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>JJ</given-names></name>, <name name-style="western"><surname>van Karnebeek</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Wasserman</surname> <given-names>WW</given-names></name>. <article-title>FLAGS, frequently mutated genes in public exomes</article-title>. <source>BMC Med Genomics</source>. <year>2014</year>;<volume>7</volume>:<fpage>64</fpage>. Epub 2014/12/04. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12920-014-0064-y" xlink:type="simple">10.1186/s12920-014-0064-y</ext-link></comment> <object-id pub-id-type="pmid">25466818</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kuleshov</surname> <given-names>MV</given-names></name>, <name name-style="western"><surname>Jones</surname> <given-names>MR</given-names></name>, <name name-style="western"><surname>Rouillard</surname> <given-names>AD</given-names></name>, <name name-style="western"><surname>Fernandez</surname> <given-names>NF</given-names></name>, <name name-style="western"><surname>Duan</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>Z</given-names></name>, <etal>et al</etal>. <article-title>Enrichr: a comprehensive gene set enrichment analysis web server 2016 update</article-title>. <source>Nucleic Acids Res</source>. <year>2016</year>;<volume>44</volume>(<issue>W1</issue>):<fpage>W90</fpage>–<lpage>7</lpage>. Epub 2016/05/05. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkw377" xlink:type="simple">10.1093/nar/gkw377</ext-link></comment> <object-id pub-id-type="pmid">27141961</object-id></mixed-citation></ref>
<ref id="pcbi.1007453.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Walsh</surname> <given-names>MF</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Edmonson</surname> <given-names>MN</given-names></name>, <name name-style="western"><surname>Gruber</surname> <given-names>TA</given-names></name>, <name name-style="western"><surname>Easton</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Germline Mutations in Predisposition Genes in Pediatric Cancer</article-title>. <source>N Engl J Med</source>. <year>2015</year>;<volume>373</volume>(<issue>24</issue>):<fpage>2336</fpage>–<lpage>46</lpage>. Epub 2015/11/19. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1056/NEJMoa1508054" xlink:type="simple">10.1056/NEJMoa1508054</ext-link></comment> <object-id pub-id-type="pmid">26580448</object-id></mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pcbi.1007453.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007453" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">2 Aug 2019</named-content>
</p>
<p>Dear Dr Xavier,</p>
<p>Thank you very much for submitting your manuscript 'TAPES: a tool for assessment and prioritisation in exome studies' for review by PLOS Computational Biology. Your manuscript has been fully evaluated by the PLOS Computational Biology editorial team and in this case also by independent peer reviewers. The reviewers appreciated the attention to an important problem, but raised some substantial concerns about the manuscript as it currently stands. While your manuscript cannot be accepted in its present form, we are willing to consider a revised version in which the issues raised by the reviewers have been adequately addressed. We cannot, of course, promise publication at that time.</p>
<p>Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out.</p>
<p>Your revisions should address the specific points made by each reviewer. Please return the revised version within the next 60 days. If you anticipate any delay in its return, we ask that you let us know the expected resubmission date by email at <email xlink:type="simple">ploscompbiol@plos.org</email>. Revised manuscripts received beyond 60 days may require evaluation and peer review similar to that applied to newly submitted manuscripts.</p>
<p>In addition, when you are ready to resubmit, please be prepared to provide the following:</p>
<p>(1) A detailed list of your responses to the review comments and the changes you have made in the manuscript. We require a file of this nature before your manuscript is passed back to the editors.</p>
<p>(2) A copy of your manuscript with the changes highlighted (encouraged). We encourage authors, if possible to show clearly where changes have been made to their manuscript e.g. by highlighting text.</p>
<p>(3) A striking still image to accompany your article (optional). If the image is judged to be suitable by the editors, it may be featured on our website and might be chosen as the issue image for that month. These square, high-quality images should be accompanied by a short caption. Please note as well that there should be no copyright restrictions on the use of the image, so that it can be published under the Open-Access license and be subject only to appropriate attribution.</p>
<p>Before you resubmit your manuscript, please consult our Submission Checklist to ensure your manuscript is formatted correctly for PLOS Computational Biology: <ext-link ext-link-type="uri" xlink:href="http://www.ploscompbiol.org/static/checklist.action" xlink:type="simple">http://www.ploscompbiol.org/static/checklist.action</ext-link>. Some key points to remember are:</p>
<p>- Figures uploaded separately as TIFF or EPS files (if you wish, your figures may remain in your main manuscript file in addition).</p>
<p>- Supporting Information uploaded as separate files, titled Dataset, Figure, Table, Text, Protocol, Audio, or Video.</p>
<p>- Funding information in the 'Financial Disclosure' box in the online system.</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com" xlink:type="simple">https://pacev2.apexcovantage.com</ext-link> PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email us at <email xlink:type="simple">figures@plos.org</email>.</p>
<p>To enhance the reproducibility of your results, we recommend that you deposit your laboratory protocols in protocols.io, where a protocol can be assigned its own identifier (DOI) such that it can be cited independently in the future. For instructions see <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/submission-guidelines#loc-materials-and-methods" xlink:type="simple">here</ext-link>. </p>
<p>We are sorry that we cannot be more positive about your manuscript at this stage, but if you have any concerns or questions, please do not hesitate to contact us.</p>
<p>Sincerely,</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>A link appears below if there are any accompanying review attachments. If you believe any reviews to be missing, please contact <email xlink:type="simple">ploscompbiol@plos.org</email> immediately:</p>
<p>[LINK]</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #1: SUMMARY:</p>
<p>'TAPES: a tool for assessment and prioritisation in exome studies' implements a novel and more precise method for assessing variant pathogenicity by introducing a novel modeling for integration of ACMG criteria. They leverage this model along with publicly available variant population frequencies to provide more accurate predictions of variant pathogenicity. Additionally, this software provides a comprehensive list of both reporting and analysis options.</p>
<p>MAJOR CODE PROBLEMS:</p>
<p>- Code doesn't seem to have any tests or automated way to run them: <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes" xlink:type="simple">https://github.com/a-xavier/tapes</ext-link>. Please add tests (preferrably using a testing framework such as PyTest) that minimally take advantage of your toy datasets that covers most of your functionality. Integration with a free and automated continuous integration environment like Travis would also be highly recommended. Once tests are in place, potentialy using branches to provide a more stable development path may aid development</p>
<p>- Toy example provided doesn't work natively or within a virtualenv:</p>
<p>- python3 tapes.py sort -i ./Example_Output/input.csv -o ./Toy_dataset/ --tab --by_gene --by_sample --enrichr --disease "autosomal dominant" --kegg "Pathways in cancer":</p>
<p>No acmg_db path given and no db_config.json found</p>
<p>Default is: /home/ubuntu/repositories/tapes/acmg_db</p>
<p>***TAPES: SORT***</p>
<p>2019-07-15 10:37:05.....Output type: FOLDER</p>
<p>Traceback (most recent call last):</p>
<p>File "tapes.py", line 309, in &lt;module&gt;</p>
<p>main()</p>
<p>File "tapes.py", line 164, in main</p>
<p>output_prefix = args.output.split('\\\\')[-2]</p>
<p>IndexError: list index out of range</p>
<p>MINOR CODE FEEDBACK</p>
<p>- I would put code that is not top-level in a `src` or `source` directory.</p>
<p>- While the Manual is fine as a PDF, long-term maintenance might be easier if it is in markdown. It can be further extrapolated using something like Read the Docs or other services.</p>
<p>MINOR EDITS:</p>
<p>- line 25: should be: "share the same phenotype" , missing "the"</p>
<p>- line 27: I think it reads better to say "Benchmarks showed that TAPES outperforms avaialable tools"</p>
<p>- line 34: "Available software can predict" drop "'s"</p>
<p>- line 90: "individuals affected" and "number of individuals", individuals I believe should be plural in both cases</p>
<p>- line 96: "very vare variants" I believe variants should be plural</p>
<p>- line 134: "cohort are in the class are probably" , missing "are"</p>
<p>- line 137-139: This is not a complete sentence.</p>
<p>- Figure 1: Charger should be "CharGer" in your legend</p>
<p>MINOR QUESTIONS:</p>
<p>- In this model there are no controls, which is novel. I'm mildly curious if it can be shown that providing controls offers little or no statistical benefit over the publily available variant frequencies.</p>
<p>- A minor discussion of why the CharGer Scores were so simlar to the TAPES probability model might be useful in context of Figure 1.</p>
<p>Reviewer #2: The article "TAPES: a tool for assessment and prioritisation in exome</p>
<p>studies" describes a new software tool to identify pathogenic and</p>
<p>benign variants. The aim described is very promising. However, I think</p>
<p>the clarity of both the paper and the documentation could be improved.</p>
<p>I will first comment on my experience with the software and then on</p>
<p>the paper. (This review is writen in MarkDown format, so it can be</p>
<p>converted to html or other format to see code section.)</p>
<p>## Comments on the software package.</p>
<p>I cloned the repository from GitHub, and could install it. Then I ran</p>
<p>into a few problems.</p>
<p>1. I found a bug in `t_func.py` on line 3197 that made the program</p>
<p>crash</p>
<p>```</p>
<p>with gzip.open(os.path.join(acmg_db_path, 'repeat_dict.{}.gz'.format(assembly)), "r") as dj:</p>
<p>```</p>
<p>Correcting the line to the following solved the issue.</p>
<p>```</p>
<p>with gzip.open(os.path.join(acmg_db_path, 'repeat_dict.{}.gz'.format(assembly)), "rt") as dj:</p>
<p>```</p>
<p>2. It was not clear at installation that I should install annovar to</p>
<p>be able to use tapes. I have found this information later in the</p>
<p>manual.</p>
<p>3. After installing annovar I needed to run</p>
<p>`python3 tapes.py db -s -A annovar` and</p>
<p>`python3 tapes.py db -b annovar` before I could annotate vcf</p>
<p>files. These commands were only mentioned at the end of the manual.</p>
<p>4. I did not manage to find a way to start with a vcf file, annotate</p>
<p>it and finally obtain ACMG classification. I think a tutorial and an</p>
<p>example dataset (starting from vcf files) would be valuable additions.</p>
<p>5. I would suggest to add a workflow diagram both to the manual and to</p>
<p>the paper to make it clear what kind of steps are needed and what</p>
<p>are the potential input and output files.</p>
<p>6. I could not identify what was the input file used for the analysis</p>
<p>shown in the paper, so I could not check whether it is</p>
<p>reproducible.</p>
<p>7. The program does not always produce the expected file name or it</p>
<p>does create the expected file, but does not log it</p>
<p>correctly. I think the code needs to be checked more thoroughly.</p>
<p>8. Please create a release for the publication version of the package</p>
<p>so people can know which version/status of the software was used</p>
<p>for the publication (This can be done at <ext-link ext-link-type="uri" xlink:href="https://github.com/a-xavier/tapes/releases" xlink:type="simple">https://github.com/a-xavier/tapes/releases</ext-link>).</p>
<p>9. A docker image is always a nice addition, to make sure that</p>
<p>everything is specified as it should be, and there are no problems</p>
<p>due to difference in the software environment. It is also a good</p>
<p>way to test, how a software can be installed in a new environment.</p>
<p>## Comments on the paper</p>
<p>I have found several typos and grammatically mistakes. I think the</p>
<p>text should be checked more thoroughly for mistakes.</p>
<p>1. Abstract line 17: What does "downstream" variants refer to?</p>
<p>2. On line 20 multi-sample variant calling formats are mentioned in</p>
<p>the abstract, but this is never mentioned further in the article. I</p>
<p>would either remove it from the abstract or add an explanation to</p>
<p>a later section.</p>
<p>3. Lines 25-26. The Authors mention that cohort samples can be analyzed</p>
<p>even without a control sample set. My question is whether it is</p>
<p>possible to make use of a control set or is it only possible to use</p>
<p>the standard option where the databases are checked?</p>
<p>4. Lines 26-27: "Finally, it can provide powerful filtering and</p>
<p>reporting options to help researchers make sense of cohort</p>
<p>studies." I would say "it provides powerful filtering and</p>
<p>reporting options". I find "make sense" to be too informal for a</p>
<p>scientific paper.</p>
<p>5. The Author summary contains several typos also some have</p>
<p>grammatical mistakes as well.</p>
<p>6. Lines 34-35: "but does not take into account the fact that the</p>
<p>variants belongs in a cohort." I don't know what this sentence</p>
<p>refers to exactly. Also, I have the same comment for line 52: "any</p>
<p>chort characteristic". I think there should be a clear discussion</p>
<p>on what these are and how they are used or not used by the</p>
<p>different software tools.</p>
<p>7. ANNOVAR interface and annotated variant file: lines 66-69. This</p>
<p>section contains grammatical mistakes and is not clearly</p>
<p>structured. I think it would be good to have workflow chart to make</p>
<p>clear how different inputs can be used. Starting from VCF either</p>
<p>VCF --(3rd party tools)--&gt; annoteted VCF or</p>
<p>VCF --(TAPES as a wrapper for ANNOVAR)--&gt; annoteted VCF. And how to proceed</p>
<p>with the annotated VCF. I could not use the sort function on a VCF,</p>
<p>only on CSV.</p>
<p>8. Line 69: "without having to specify the databases and annotations</p>
<p>to use." It is true that when running one does not have to specify them,</p>
<p>but at set up the user has to specify which databases are to be</p>
<p>used, according to my experience. Also this comment gives the</p>
<p>impression that the user has no control over which databases are</p>
<p>being used.</p>
<p>9. Lines 76-80. I think it would be nice to have a description on how</p>
<p>each criterion was implemented as supplementary at least. Then the</p>
<p>Authors could say that most criteria were straight forward to</p>
<p>implement (see suppl.), but the others we solved in the following</p>
<p>way, and then explain them.</p>
<p>10. Line 95. assumptions "are" made.</p>
<p>11. Line 125. I would not use "most confidence", but "highest level of</p>
<p>confidence".</p>
<p>12. Lines 128-129. I would suggest to reformulate the first sentence</p>
<p>to make it clearer.</p>
<p>13. Line 134. What does "in the class" mean?</p>
<p>14. Line 169. "sheer number of" I find this a bit too informal.</p>
<p>15. Figure 1. and surrounding text is not well formulated. It is</p>
<p>difficult to interpret the difference between "TAPES proba" and</p>
<p>"TAPES ACMG". I have only realized what the difference was once I</p>
<p>opened the supplementary table and saw the last two columns. I</p>
<p>think this could be improved.</p>
<p>16. Lines 166-167. How did the Authors arrive at the threshold values (0.35 and 0.8) for</p>
<p>probability scores? Was it to maximize the score on the</p>
<p>example/training dataset? I would suggest to use more than one dataset</p>
<p>for benchmarking and testing. It should be avoided to optimize a</p>
<p>method on the benchmarking set.</p>
<p>17. Figure 1. According to the benchmarks TAPES falls either between the two other</p>
<p>software or performs worse than the other two software if we use the</p>
<p>ACMG results according to the ROC and precision recall analysis. While</p>
<p>this is not discussed in the text. Also would the Authors suggest to</p>
<p>use the Probability instead of ACMG then?</p>
<p>18. Figure 3. I prefer graphs with two axes. Having two y-axes makes</p>
<p>it difficult to interpret. The two graphs could be shown below each</p>
<p>other (A and B) with the same x-axis, but separate y-axes for the two plots.</p>
<p>19. Lines 188-194. I think this section could be improved by adding</p>
<p>example output, adding context on how does it compare to a</p>
<p>workflow without TAPES to fully show the benefits of the method.</p>
<p>I suggest to separate real data and mock up (made up) data</p>
<p>examples.</p>
<p>20. TAPES is able to assign variants to ACMG categories and then can</p>
<p>do further sorting and reporting. Other software tools can also</p>
<p>use ACMG categories as mentioned in the introduction. Can TAPES</p>
<p>use the output of those software and then do sorting and reporting?</p>
<p>21. On which platforms was TAPES tested?</p>
<p>22. Please add a release for TAPES that is referred to in the article.</p>
<p>Also add version numbers for the software used (or commit tags from GitHub).</p>
<p>## Summary</p>
<p>Overall, I find the tool promising. However, I do think both the</p>
<p>software package and the article require significant revision.</p>
<p>I do believe that TAPES can become a valuable tool.&lt;/module&gt;</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: No: I could not identify the input data used for the benchmark.</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #1: Yes: Nathan Dunn</p>
<p>Reviewer #2: No</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pcbi.1007453.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pcbi.1007453" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pcbi.1007453.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">21 Aug 2019</named-content>
</p>
<supplementary-material id="pcbi.1007453.s006" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s006" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">PlosCompBiol Response_BTP_RJS.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pcbi.1007453.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007453" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">7 Sep 2019</named-content>
</p>
<p>Dear Dr Xavier,</p>
<p>Thank you very much for submitting your manuscript 'TAPES: a tool for assessment and prioritisation in exome studies' for review by PLOS Computational Biology. Your manuscript has been fully evaluated by the PLOS Computational Biology editorial team and in this case also by independent peer reviewers. The reviewers appreciated the attention to an important problem, but raised some substantial concerns about the manuscript as it currently stands. At this time we are not willing to consider a revised manuscript unless you can provide the following information, in addition to adequately answering the reviewers' concerns:</p>
<p><!-- <span style="caret-color: rgb(0, 0, 51); color: rgb(0, 0, 51); font-family: verdana, geneva, arial, helvetica, sans-serif; font-size: 11.199999809265137px;"> -->- the input file for the benchmark<!-- </span> --></p>
<p><!-- <span style="caret-color: rgb(0, 0, 51); color: rgb(0, 0, 51); font-family: verdana, geneva, arial, helvetica, sans-serif; font-size: 11.199999809265137px;"> -->- the reference set for the benchmark<!-- </span> --></p>
<p><!-- <span style="caret-color: rgb(0, 0, 51); color: rgb(0, 0, 51); font-family: verdana, geneva, arial, helvetica, sans-serif; font-size: 11.199999809265137px;"> -->- how the thresholds were calculated.<!-- </span> --></p>
<p>We cannot, of course, promise publication, even if you decide to send us a revised version.</p>
<p>Please note while forming your response, if your article is accepted, you may have the opportunity to make the peer review history publicly available. The record will include editor decision letters (with reviews) and your responses to reviewer comments. If eligible, we will contact you to opt in or out.</p>
<p>Your revisions should address the specific points made by each reviewer. Please return the revised version within the next 60 days. If you anticipate any delay in its return, we ask that you let us know the expected resubmission date by email at <email xlink:type="simple">ploscompbiol@plos.org</email>. Revised manuscripts received beyond 60 days may require evaluation and peer review similar to that applied to newly submitted manuscripts.</p>
<p>In addition, when you are ready to resubmit, please be prepared to provide the following:</p>
<p>(1) A detailed list of your responses to the review comments and the changes you have made in the manuscript. We require a file of this nature before your manuscript is passed back to the editors.</p>
<p>(2) A copy of your manuscript with the changes highlighted (encouraged). We encourage authors, if possible to show clearly where changes have been made to their manuscript e.g. by highlighting text.</p>
<p>(3) A striking still image to accompany your article (optional). If the image is judged to be suitable by the editors, it may be featured on our website and might be chosen as the issue image for that month. These square, high-quality images should be accompanied by a short caption. Please note as well that there should be no copyright restrictions on the use of the image, so that it can be published under the Open-Access license and be subject only to appropriate attribution.</p>
<p>Before you resubmit your manuscript, please consult our Submission Checklist to ensure your manuscript is formatted correctly for PLOS Computational Biology: <ext-link ext-link-type="uri" xlink:href="http://www.ploscompbiol.org/static/checklist.action" xlink:type="simple">http://www.ploscompbiol.org/static/checklist.action</ext-link>. Some key points to remember are:</p>
<p>- Figures uploaded separately as TIFF or EPS files (if you wish, your figures may remain in your main manuscript file in addition).</p>
<p>- Supporting Information uploaded as separate files, titled Dataset, Figure, Table, Text, Protocol, Audio, or Video.</p>
<p>- Funding information in the 'Financial Disclosure' box in the online system.</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com" xlink:type="simple">https://pacev2.apexcovantage.com</ext-link> PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email us at <email xlink:type="simple">figures@plos.org</email>.</p>
<p>To enhance the reproducibility of your results, we recommend that you deposit your laboratory protocols in protocols.io, where a protocol can be assigned its own identifier (DOI) such that it can be cited independently in the future. For instructions see <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/submission-guidelines#loc-materials-and-methods" xlink:type="simple">here</ext-link>. </p>
<p>We are sorry that we cannot be more positive about your manuscript at this stage, but if you have any concerns or questions, please do not hesitate to contact us.</p>
<p>Sincerely,</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>A link appears below if there are any accompanying review attachments. If you believe any reviews to be missing, please contact <email xlink:type="simple">ploscompbiol@plos.org</email> immediately:</p>
<p>[LINK]</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #1: Thanks you for addressing my concerns.</p>
<p>Reviewer #2: In my opinion the manuscript is clearer now thanks to the corrections. I still think that adding a flowchart on input, output and processes could be very helpful to easily understand the pipeline and the possibilities. Below I have included a suggested workflow chart.</p>
<p>Most importantly, please indicate clearly the input file and the reference set for the benchmark, and how the thresholds were calculated. Otherwise, readers cannot evaluate the validity, it would be only faith or distrust.</p>
<p>In addition, I recommend to further improve the git repo, because potential users will give up easily if it is not clear or there are two manny mistakes. Adding a tutotorial with example input (e.g. the benchmark would be an excelent example), commands to run and the interpretation would help users test that everything is installed correctly and help them understand how the program works and what the seteps are.</p>
<p>Example flowchart, based on the manual, in mermaid (it can be drawn using the online editor: <ext-link ext-link-type="uri" xlink:href="https://mermaidjs.github.io/mermaid-live-editor/" xlink:type="simple">https://mermaidjs.github.io/mermaid-live-editor/</ext-link>):</p>
<p>```</p>
<p>graph TD</p>
<p>VCF --&gt;an{annotate}</p>
<p>VCF --&gt;an2</p>
<p>subgraph ANNOVAR</p>
<p>an2{annotate: table_annovar.pl}</p>
<p>end</p>
<p>an2 --&gt;VCF2[VCF: annotated variants]</p>
<p>an2 --&gt;TSV[TSV: annotated variants]</p>
<p>VCF2--&gt;an</p>
<p>TSV --&gt;|?|sort</p>
<p>subgraph TAPES</p>
<p>subgraph wrapped ANNOVAR</p>
<p>an</p>
<p>end</p>
<p>an --&gt; Annot[CSV: annotated variants]</p>
<p>Annot --&gt;sort{sort}</p>
<p>sort --&gt;Sorted[CSV: sorted varainats]</p>
<p>Sorted --&gt;X{analyse}</p>
<p>end</p>
<p>X --&gt;|by_sample| S[By sample report]</p>
<p>X --&gt;|by_gene| G[By gene reoprt]</p>
<p>X --&gt;|enrich| E[EnrichR report]</p>
<p>X --&gt;|list| L[Kegg, List and Disease reoprt]</p>
<p>```</p>
<p># Questions based on the response from the Authors</p>
<p>In one of the answers the Authors mention that for the benchmarking they used the dataset from the CharGer publication. Please include this also in the manuscript, and also add to the repository. The same answer ends with the comment that a sentence has been added to line 176. I think that line numbering has changed, so I could not find the referenced sentence.</p>
<p>The Authors claim that ANNOVAR wrapping is totally optional, although I could not run any of the commands without setting up the database by first installing ANNOVAR.</p>
<p>I still don't understand how the threshold values 0.35 and 0.8 were chosen for the probability method which is the recommended method. My assumption is that based on the benchmark set the Authors identified which cutoffs would yield the maximum number of correctly identified variants. If this is true then an independent data set is needed to test how valid the calls are, because the benchmark and training set should be independent from each other. If this assumption is false, please include the method used for deciding the threshold values.</p>
<p># Textual comments</p>
<p>Line 35: "does not take into account the abundance of a variants in a cohort" should be "do not take into account the abundance of variants in a cohort"</p>
<p>Line 66-70. I could not perform the described steps without installing ANNOVAR and the setting up the database. Either include clearly in the manual how this can be done or modify this paragraph.</p>
<p>Line 116 should read "TAPES provides an array of different useful reports."</p>
<p>Line 119. "on the command line" not "in"</p>
<p>Line 121. "a pathway" not plurar</p>
<p>Line 122. "users do research" could be "run searches"</p>
<p>Line 145. TAPES will or does?</p>
<p>Line 158. Is the table only used as reference or also as input for the analysis? Please add the input.</p>
<p>Line 167-168. How do the ROC curves suggest the threshold values?</p>
<p>Figure 3: Why is the old TAPES curve used instead of the new one that is already in the git repo?</p>
<p># Code review</p>
<p>The code still contains bugs that cause it to crash. Although the git repo suggests using `python tapes.py`, since tapes.py is written in python3 and the default python on many linux systems is python2 the program crashes.</p>
<p>Many of the example commands contains incorrect hyphen character that results in an error when copy pasting them to command line.</p>
<p>Attempting o run the "Quick Start" section</p>
<p>`python tapes.py db -s -A /path/to/annovar/` -&gt; `python3 tapes.py db -s -A ~/temp/tapes/annovar/`</p>
<p>Worked fine with absolute path, but fails with relative path with a non informative error.</p>
<p>`python3 tapes.py db -s -A ../tapes/annovar/` Gives the following output:</p>
<p>```</p>
<p>No acmg_db path given and no db_config.json found</p>
<p>Default is: /home/user/temp/tapes-0.1/acmg_db</p>
<p>***TAPES: SEE DATABASE***</p>
<p>2019-09-04 13:45:59.....Fetching ANNOVAR Alldb file</p>
<p>NOTICE: Web-based checking to see whether ANNOVAR new version is available ... Done</p>
<p>NOTICE: Downloading annotation database <ext-link ext-link-type="uri" xlink:href="http://www.openbioinformatics.org/annovar/download/hg19_avdblist.txt.gz" xlink:type="simple">http://www.openbioinformatics.org/annovar/download/hg19_avdblist.txt.gz</ext-link> ... OK</p>
<p>NOTICE: Uncompressing downloaded files</p>
<p>NOTICE: Finished downloading annotation files for hg19 build version, with files saved at the '.' directory</p>
<p>NOTICE: Web-based checking to see whether ANNOVAR new version is available ... Done</p>
<p>NOTICE: Downloading annotation database <ext-link ext-link-type="uri" xlink:href="http://www.openbioinformatics.org/annovar/download/hg38_avdblist.txt.gz" xlink:type="simple">http://www.openbioinformatics.org/annovar/download/hg38_avdblist.txt.gz</ext-link> ... OK</p>
<p>NOTICE: Uncompressing downloaded files</p>
<p>NOTICE: Finished downloading annotation files for hg38 build version, with files saved at the '.' directory</p>
<p>Traceback (most recent call last):</p>
<p>File "tapes.py", line 406, in &lt;module&gt;</p>
<p>tf.check_online_annovar_dbs(annovar_path)</p>
<p>File "/home/user/temp/tapes-0.1/src/t_func.py", line 883, in check_online_annovar_dbs</p>
<p>with open(outfile_hg19, 'r') as file:</p>
<p>FileNotFoundError: [Errno 2] No such file or directory: '../tapes/annovar/hg19_avdblist.txt'</p>
<p>```</p>
<p>`python tapes.py db -b --acmg --assembly hg19` -&gt; `python3 tapes.py db -b --acmg --assembly hg19` Fails</p>
<p>```</p>
<p>No acmg_db path given and no db_config.json found</p>
<p>Default is: /home/user/temp/tapes-0.1/acmg_db</p>
<p>***TAPES: DOWNLOAD DATABASE***</p>
<p>No annovar path given and no db_config.json found</p>
<p>Traceback (most recent call last):</p>
<p>File "tapes.py", line 358, in &lt;module&gt;</p>
<p>tf.build_annovar_db(annovar_path, args.assembly, args.acmg)</p>
<p>NameError: name 'annovar_path' is not defined</p>
<p>```</p>
<p>`python3 tapes.py annotate -i toy_dataset/toy.vcf -o test/output.vcf --acmg –a hg19` does not run and prints out the help page plus the following warning:</p>
<p>```</p>
<p>tapes: error: unrecognized arguments: –a hg19</p>
<p>```</p>
<p>After changing the hyphen to the correct character ` python3 tapes.py annotate -i toy_dataset/toy.vcf -o test/output.vcf --acmg -a hg19`</p>
<p>```</p>
<p>No acmg_db path given and no db_config.json found</p>
<p>Default is: /home/user/temp/tapes-0.1/acmg_db</p>
<p>***TAPES: ANNOTATE***</p>
<p>No annovar path given and no db_config.json found</p>
<p>Traceback (most recent call last):</p>
<p>File "tapes.py", line 384, in &lt;module&gt;</p>
<p>tf.process_annotate_vcf(<ext-link ext-link-type="uri" xlink:href="http://args.input" xlink:type="simple">args.input</ext-link>, args.output, annovar_path, args.assembly, args.ref_anno, args.acmg)</p>
<p>NameError: name 'annovar_path' is not defined</p>
<p>```</p>
<p>`python3 tapes.py sort -i toy_dataset/toy_annovar_multi.vcf -o test-sort/ --tab` works and creates a folder with three plots (png) and `test-sort.txt`, which is a tab separated file</p>
<p>`python3 tapes.py analyse -i test-sort/test-sort.txt -o test-report/report.txt --single_option` Fails with the following error:</p>
<p>```</p>
<p>tapes: error: unrecognized arguments: --single_option</p>
<p>`python3 tapes.py analyse -i test-sort/test-sort.txt -o test-report/report.txt` Runs without error, but creates no output</p>
<p>```</p>
<p>No acmg_db path given and no db_config.json found</p>
<p>Default is: /home/user/temp/tapes-0.1/acmg_db</p>
<p>***TAPES: RE-ANALYSE***</p>
<p>2019-09-04 14:07:10.....48 samples found</p>
<p>2019-09-04 14:07:10.....Output type: TXT/TSV + XLSX</p>
<p>2019-09-04 14:07:10.....Done</p>
<p>```</p>
<p>However, `python3 tapes.py sort -i toy_dataset/toy_annovar_multi.vcf -o test-full/ --tab --by_gene --by_sample --enrichr --list "MLH1 MSH6 MSH2" --disease "autosomal dominant" --kegg "pathways in cancer"` does work.&lt;/module&gt;&lt;/module&gt;&lt;/module&gt;</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: No: Input or reference set for the bechmarking or their clear description</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #1: Yes: Nathan Dunn</p>
<p>Reviewer #2: No</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pcbi.1007453.r004">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453.r004</article-id>
<title-group>
<article-title>Author response to Decision Letter 1</article-title>
</title-group>
<related-object document-id="10.1371/journal.pcbi.1007453" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj004" link-type="rebutted-decision-letter" object-id="10.1371/journal.pcbi.1007453.r003" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">15 Sep 2019</named-content>
</p>
<supplementary-material id="pcbi.1007453.s007" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pcbi.1007453.s007" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">PlosCompBiol Response_2.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pcbi.1007453.r005" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453.r005</article-id>
<title-group>
<article-title>Decision Letter 2</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007453" document-id-type="doi" document-type="article" id="rel-obj005" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">1 Oct 2019</named-content>
</p>
<p>Dear Dr Xavier,</p>
<p>We are pleased to inform you that your manuscript 'TAPES: a tool for assessment and prioritisation in exome studies' has been provisionally accepted for publication in PLOS Computational Biology.</p>
<p>Before your manuscript can be formally accepted you will need to complete some formatting changes, which you will receive in a follow up email. Please be aware that it may take several days for you to receive this email; during this time no action is required by you. Once you have received these formatting requests, please note that your manuscript will not be scheduled for publication until you have made the required changes.</p>
<p>In the meantime, please log into Editorial Manager at <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pcompbiol/" xlink:type="simple">https://www.editorialmanager.com/pcompbiol/</ext-link>, click the "Update My Information" link at the top of the page, and update your user information to ensure an efficient production and billing process.</p>
<p>One of the goals of PLOS is to make science accessible to educators and the public. PLOS staff issue occasional press releases and make early versions of PLOS Computational Biology articles available to science writers and journalists. PLOS staff also collaborate with Communication and Public Information Offices and would be happy to work with the relevant people at your institution or funding agency. If your institution or funding agency is interested in promoting your findings, please ask them to coordinate their releases with PLOS (contact <email xlink:type="simple">ploscompbiol@plos.org</email>).</p>
<p>Thank you again for supporting Open Access publishing. We look forward to publishing your paper in PLOS Computational Biology.</p>
<p>Sincerely,</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>Mihaela Pertea</p>
<p>Software Editor</p>
<p>PLOS Computational Biology</p>
<p>Reviewer's Responses to Questions</p>
<p><bold>Comments to the Authors:</bold></p>
<p><bold>Please note here if the review is uploaded as an attachment.</bold></p>
<p>Reviewer #2: Dear Authors,</p>
<p>Thank you for addressing all my comments. I think the manuscript has improved significantly since the submission. I find the new comparison results and figures very impressive and convincing.</p>
<p>I have two minor comments:</p>
<p>Is the release number still 0.1 as stated in the manuscript or is it 0.1.1? I would suggest the improved version. Otherwise, potential users might start with 0.1 and be discouraged by the bugs and give up.</p>
<p>I would include the version of Figure 3 that best represents the version of the software that is used for the latest version of the manuscript and github.</p>
<p>I leave both these comments up the the Authors consideration when working on the final proof of the paper.</p>
<p>**********</p>
<p><bold>Have all data underlying the figures and results presented in the manuscript been provided?</bold></p>
<p>Large-scale datasets should be made available via a public repository as described in the <italic>PLOS Computational Biology</italic> <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/ploscompbiol/s/data-availability" xlink:type="simple">data availability policy</ext-link>, and numerical data that underlies graphs or summary statistics should be provided in spreadsheet form as supporting information.</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p>PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/ploscompbiol/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.</p>
<p>Reviewer #2: No</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pcbi.1007453.r006" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pcbi.1007453.r006</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Pertea</surname>
<given-names>Mihaela</given-names>
</name>
<role>Software Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Mihaela Pertea</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pcbi.1007453" document-id-type="doi" document-type="article" id="rel-obj006" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">9 Oct 2019</named-content>
</p>
<p>PCOMPBIOL-D-19-01091R2 </p>
<p>TAPES: a tool for assessment and prioritisation in exome studies</p>
<p>Dear Dr Xavier,</p>
<p>I am pleased to inform you that your manuscript has been formally accepted for publication in PLOS Computational Biology. Your manuscript is now with our production department and you will be notified of the publication date in due course.</p>
<p>The corresponding author will soon be receiving a typeset proof for review, to ensure errors have not been introduced during production. Please review the PDF proof of your manuscript carefully, as this is the last chance to correct any errors. Please note that major changes, or those which affect the scientific understanding of the work, will likely cause delays to the publication date of your manuscript. </p>
<p>Soon after your final files are uploaded, unless you have opted out, the early version of your manuscript will be published online. The date of the early version will be your article's publication date. The final article will be published to the same URL, and all versions of the paper will be accessible to readers.</p>
<p>Thank you again for supporting PLOS Computational Biology and open-access publishing. We are looking forward to publishing your work! </p>
<p>With kind regards,</p>
<p>Matt Lyles</p>
<p>PLOS Computational Biology | Carlyle House, Carlyle Road, Cambridge CB4 3DN | United Kingdom <email xlink:type="simple">ploscompbiol@plos.org</email> | Phone +44 (0) 1223-442824 | <ext-link ext-link-type="uri" xlink:href="http://ploscompbiol.org" xlink:type="simple">ploscompbiol.org</ext-link> | @PLOSCompBiol</p>
</body>
</sub-article>
</article>