<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">PONE-D-19-29289</article-id>
<article-id pub-id-type="doi">10.1371/journal.pone.0233438</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Genetics</subject><subj-group><subject>Mutation</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Clinical medicine</subject><subj-group><subject>Clinical trials</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Pharmacology</subject><subj-group><subject>Drug research and development</subject><subj-group><subject>Clinical trials</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Clinical trials</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject><subj-group><subject>Neoplasms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Clinical medicine</subject><subj-group><subject>Clinical trials</subject><subj-group><subject>Clinical trials (cancer treatment)</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Pharmacology</subject><subj-group><subject>Drug research and development</subject><subj-group><subject>Clinical trials</subject><subj-group><subject>Clinical trials (cancer treatment)</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Clinical trials</subject><subj-group><subject>Clinical trials (cancer treatment)</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Endocrinology</subject><subj-group><subject>Endocrine disorders</subject><subj-group><subject>Diabetes mellitus</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Metabolic disorders</subject><subj-group><subject>Diabetes mellitus</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject><subj-group><subject>Lung and intrathoracic tumors</subject><subj-group><subject>Non-small cell lung cancer</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject><subj-group><subject>Lung and intrathoracic tumors</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Oncology</subject><subj-group><subject>Cancers and neoplasms</subject><subj-group><subject>Carcinomas</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Unique insights from ClinicalTrials.gov by mining protein mutations and RSids in addition to applying the Human Phenotype Ontology</article-title>
<alt-title alt-title-type="running-head">Unique insights from ClinicalTrials.gov by mining protein mutations and RSids and applying HPO</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-1725-2891</contrib-id>
<name name-style="western">
<surname>Alag</surname> <given-names>Shray</given-names></name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Data curation</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Investigation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Project administration</role>
<role content-type="http://credit.casrai.org/">Resources</role>
<role content-type="http://credit.casrai.org/">Software</role>
<role content-type="http://credit.casrai.org/">Validation</role>
<role content-type="http://credit.casrai.org/">Visualization</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"/>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001">
<addr-line>The Harker School, San Jose, CA, United States of America</addr-line>
</aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Bandapalli</surname> <given-names>Obul Reddy</given-names></name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1">
<addr-line>German Cancer Research Center (DKFZ), GERMANY</addr-line>
</aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The author has declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">21shraya@students.harker.org</email></corresp>
</author-notes>
<pub-date pub-type="collection">
<year>2020</year>
</pub-date>
<pub-date pub-type="epub">
<day>27</day>
<month>5</month>
<year>2020</year>
</pub-date>
<volume>15</volume>
<issue>5</issue>
<elocation-id>e0233438</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>10</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>5</day>
<month>5</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-year>2020</copyright-year>
<copyright-holder>Shray Alag</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0233438"/>
<abstract>
<p>Researchers and clinicians face a significant challenge in keeping up-to-date with the rapid rate of new associations between genetic mutations and diseases. To remedy this problem, this research mined the ClinicalTrials.gov corpus to extract relevant biological insights, produce unique reports to summarize findings, and make the meta-data available via APIs. An automated text-analysis pipeline performed the following features: parsing the ClinicalTrials.gov files, extracting and analyzing mutations from the corpus, mapping clinical trials to Human Phenotype Ontology (HPO), and finding associations between clinical trials and HPO nodes. Unique reports were created for each mutation (SNPs and protein mutations) mentioned in the corpus, as well as for each clinical trial that references a mutation. These reports, which have been run over multiple time points, along with APIs to access meta-data, are freely available at <ext-link ext-link-type="uri" xlink:href="http://snpminertrials.com" xlink:type="simple">http://snpminertrials.com</ext-link>. Additionally, HPO was used to normalize disease terms and associate clinical trials with relevant genes. The creation of the pipeline and reports, the association of clinical trials with HPO terms, and the insights, public repository, and APIs produced are all novel in this work. The freely-available resources present relevant biological information and novel insights between biomedical entities in a robust and accessible manner, mitigating the challenge of being informed about new associations between mutations, genes, and diseases.</p>
</abstract>
<funding-group>
<funding-statement>The author received no specific funding for this work.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="8"/>
<page-count count="20"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All reports, APIs, and Google Colab Notebook are available via the projects homepage <ext-link ext-link-type="uri" xlink:href="http://snpminertrials.com" xlink:type="simple">http://snpminertrials.com</ext-link>. Data is also available at the public OSF repository <ext-link ext-link-type="uri" xlink:href="http://osf.io/qcmk6" xlink:type="simple">http://osf.io/qcmk6</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>The rapid decrease in the cost of Next-Gen Sequencing (NGS) over the past decade has led to a multitude of new NGS-based studies. Frequently, these studies associate genomic mutations—such as protein mutations and Single Nucleotide Polymorphisms [<xref ref-type="bibr" rid="pone.0233438.ref001">1</xref>] (SNPs)—with genes, drugs, diseases, and other phenotypes [<xref ref-type="bibr" rid="pone.0233438.ref002">2</xref>]. Knowledge about new associations is crucial for researchers and clinicians since understanding an individual’s genetic mutations can help identify disease risk, improve prognosis, and tailor personalized treatments [<xref ref-type="bibr" rid="pone.0233438.ref003">3</xref>][<xref ref-type="bibr" rid="pone.0233438.ref004">4</xref>]. It is currently cumbersome to keep up with the rapid rate of discoveries; however, since manual efforts to curate the literature are highly time-consuming.</p>
<p>ClinicalTrials.gov, run by the United States National Library of Medicine, contains more than 330,000 text documents detailing both past and present clinical trials globally [<xref ref-type="bibr" rid="pone.0233438.ref005">5</xref>]. A proportion of these trials includes information on SNPs, protein mutations, and genes.</p>
<p>Many previous researchers have effectively mined the clinical trials corpus to gain new insights: Zhang et al. 2019 [<xref ref-type="bibr" rid="pone.0233438.ref006">6</xref>] maps Laboratory Observation Identifier Names and Codes (LOINC [<xref ref-type="bibr" rid="pone.0233438.ref007">7</xref>]) to Human Phenotype Ontology (HPO [<xref ref-type="bibr" rid="pone.0233438.ref008">8</xref>]) terms; Gandy et al. 2017 [<xref ref-type="bibr" rid="pone.0233438.ref009">9</xref>] develop CTMine, which uses regular expressions for gene names to search clinical trials; Xu et al. 2016 [<xref ref-type="bibr" rid="pone.0233438.ref010">10</xref>] curates genetic alterations in cancer clinical trials; Su and Sanger, 2017 [<xref ref-type="bibr" rid="pone.0233438.ref011">11</xref>] mine ClinicalTrials.gov to develop a novel method of drug repositioning; Pradhan et al. 2018 [<xref ref-type="bibr" rid="pone.0233438.ref012">12</xref>] conduct a meta-analysis by automatically extracting data from ClinicalTrials.gov; and Sfakianaki et al. 2015 [<xref ref-type="bibr" rid="pone.0233438.ref013">13</xref>] use a Natural Language Processing (NLP) framework to mine ClinicalTrials.gov.</p>
<p>However, despite these important advances, mapping clinical trials to HPO terms, extracting protein mutations and SNPs [<xref ref-type="bibr" rid="pone.0233438.ref014">14</xref>] across the ClinicalTrials.gov corpus, and creating mutation-specific and clinical-trials-specific reports remain feats not yet accomplished.</p>
<p>This study analyzes ClinicalTrials.gov with six specific goals:</p>
<list list-type="order">
<list-item>
<p>Develop a Natural Language Processing based pipeline that extracts <bold>SNPs</bold> and <bold>protein mutations</bold> instances from free text, maps their clinical trial annotations to standardized biological terms using HPO and MeSH [<xref ref-type="bibr" rid="pone.0233438.ref015">15</xref>] ontologies, and analyzes the complete ClinicalTrials.gov corpus to extract new insights between mutations and diseases in the clinical trials literature.</p>
</list-item>
<list-item>
<p>Generate unique reports, made freely available online, for each of the extracted <bold>mutations</bold>. These reports should contain the context in which the mutation is mentioned across all clinical trials, along with the associated HPO disease terms. Further, HPO annotations [<xref ref-type="bibr" rid="pone.0233438.ref016">16</xref>] should be used to reference other genes associated with that disease. Reports should additionally be hyper-linked to key resources for easy access to relevant content. These reports enable the presentation of new biological information in a robust and accessible manner.</p>
</list-item>
<list-item>
<p>Generate reports for each <bold>clinical trial</bold> that mentions a mutation. Statistics on the frequency and clinical trial categories in which mutations occur should also be provided.</p>
</list-item>
<list-item>
<p>Create a freely-available <bold>public repository</bold> with data associating mutations, clinical trials, disease, HPO terms, and MeSH terms. Develop APIs to access the data programmatically.</p>
</list-item>
<list-item>
<p>Repeat the analysis over <bold>multiple time frames</bold>, enabling future meta-analyses that may provide additional insights into mutation-disease associations over a period of time.</p>
</list-item>
<list-item>
<p>Demonstrate via an example of how the meta-data extracted from this work can be used for <bold>machine learning.</bold></p>
</list-item>
</list>
<p>It is hypothesized that creating a public repository of associations between clinical trials, disease terms, SNPs, and protein mutations—and making such a repository freely-available via HTML reports, processed data, and APIs—will enable researchers and clinicians to stay up-to-date.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Materials and methods</title>
<p>Two publicly-available datasets were used in this study: ClinicalTrials.gov and HPO. The methods described here are also publicly-available at protocols.io (dx.doi.org/10.17504/protocols.io.bfacjiaw).</p>
<sec id="sec003">
<title>Datasets</title>
<sec id="sec004">
<title>ClinicalTrials.gov [<xref ref-type="bibr" rid="pone.0233438.ref005">5</xref>]</title>
<p>The complete repository of clinical trials displayed at ClinicalTrials.gov is available in XML format with a well-defined schema. However, analyzing clinical trial text to derive valuable insights is still a challenge as it involves parsing free-text [<xref ref-type="bibr" rid="pone.0233438.ref017">17</xref>].</p>
</sec>
<sec id="sec005">
<title>HPO [<xref ref-type="bibr" rid="pone.0233438.ref008">8</xref>]</title>
<p>HPO is a standardized vocabulary of phenotype abnormalities that are seen in humans [<xref ref-type="bibr" rid="pone.0233438.ref008">8</xref>]. HPO is a product of the Monarch Initiative and one of the thirteen driver projects in the Global Alliance for Genomics and Health (GA4GH [<xref ref-type="bibr" rid="pone.0233438.ref018">18</xref>]) strategic roadmap. The HPO ontology files are available in the OBO [<xref ref-type="bibr" rid="pone.0233438.ref019">19</xref>] flat-file format and are easy to read and parse. HPO annotations provide a correlation between HPO terms and genes. There are three annotation files that contain associations between genes and phenotypes. The HPO files used in this project consisted of 14,961 HPO nodes, with 18,547 parent-child relationships between the nodes. Furthermore, 820,297 gene-phenotype annotations mapped across 4,312 unique genes and 8,947 individual HPO terms.</p>
<p>For each node, when applicable, the HPO ontology files contain a reference to MeSH, UMLS, and SnomedCT ontologies. For example, the HPO node “id: HP:0000003” with name “Multicystic kidney dysplasia” maps to the following four cross-ontology terms.</p>
<list list-type="order">
<list-item>
<p>“xref: MSH:D021782”, which implies MeSH id D021782 and name “Multicystic Dysplastic Kidney.”</p>
</list-item>
<list-item>
<p>“xref: SNOMEDCT_US:204962002”, which implies SNOMEDCT id 204962002 and name: “Multicystic kidney”</p>
</list-item>
<list-item>
<p>“xref: SNOMEDCT_US:82525005”, which implies SNOMEDCT id 204962002 and name: “Multiple congenital cysts of kidney”</p>
</list-item>
<list-item>
<p>“xref: UMLS:C3714581”, which implies UMLS id C3714581 and name: “Multicystic dysplastic kidney”</p>
</list-item>
</list>
</sec>
<sec id="sec006">
<title>MeSH [<xref ref-type="bibr" rid="pone.0233438.ref015">15</xref>]</title>
<p>Although the ClinicalTrials.gov XML does not contain MeSH ids, information about MeSH terms is present. The MeSH online tool [<xref ref-type="bibr" rid="pone.0233438.ref020">20</xref>] was used to retrieve MeSH ids from MeSH terms. MeSH ids are directly linked to HPO ids, in essence, enabling the association between MeSH terms to HPO nodes, as is discussed later in the Methods section.</p>
</sec>
</sec>
<sec id="sec007">
<title>Approaches for finding mutations</title>
<sec id="sec008">
<title>Mutation format</title>
<p>The Human Genome Variation Society (HGVS) defines a format [<xref ref-type="bibr" rid="pone.0233438.ref021">21</xref>][<xref ref-type="bibr" rid="pone.0233438.ref022">22</xref>] for referencing variants. As per the specifications, all variants should be described at the DNA level, noting relations to an accepted reference sequence. Descriptions can be at the DNA-level (e.g., <monospace>123456A&gt;T</monospace>), RNA-level (e.g., <monospace>76a&gt;u</monospace>), and protein level (e.g., Lys76Asn). Ogino et al. 2009 [<xref ref-type="bibr" rid="pone.0233438.ref023">23</xref>] provides a good overview of mutation nomenclature used for molecular diagnostics.</p>
</sec>
<sec id="sec009">
<title>RSids and SNPs</title>
<p>The Single Nucleotide Polymorphism database (dbSNP) repository [<xref ref-type="bibr" rid="pone.0233438.ref024">24</xref>] assigns a unique id to variations including SNPs, short nucleotide insertions and deletions, and short tandem repeats. These ids are called RSids and appear in the format <monospace>rs##</monospace>. For example, the RSid <monospace>rs35652124</monospace> maps to the following mutations in HGVS format <monospace>NC_000002.12:g.177265345T&gt;C, NC_000002.11:g.178130073T&gt;C</monospace> [<xref ref-type="bibr" rid="pone.0233438.ref025">25</xref>] and is a mutation on chromosome 2 at location 177265345, with associated gene <italic>NFE2L2</italic>. Public repositories, such as ClinVar [<xref ref-type="bibr" rid="pone.0233438.ref026">26</xref>] archive human genetic variants and interpretations of mutations’ significance to diseases. Such repositories use RSids as unique identifiers. ClinVar [<xref ref-type="bibr" rid="pone.0233438.ref024">24</xref>], for instance, has more than 400 thousand RefSNPs.</p>
</sec>
<sec id="sec010">
<title>SNP extraction</title>
<p>SNPs can be extracted with simple text processing methods as all SNPs follow the RSid format of beginning with the letters <monospace>rs</monospace> and having multiple numbers that follow the initial letters. For example, an SNP may be under the id <monospace>rs9939609</monospace> or <monospace>rs6971</monospace>.</p>
</sec>
<sec id="sec011">
<title>Protein mutation extraction</title>
<p>Several tools are available to mine mutations from the text. Some examples of such tools are:</p>
<list list-type="order">
<list-item>
<p>MutationFinder [<xref ref-type="bibr" rid="pone.0233438.ref027">27</xref>] is a simple-to-use package that uses a rule-based approach with more than 1500 regular expressions to extract protein mutations from the text.</p>
</list-item>
<list-item>
<p>Open Mutation Miner [<xref ref-type="bibr" rid="pone.0233438.ref028">28</xref>] is a tool that detects and annotates protein mutations by combining rules with the MutationFinder. It also maps the impact of the mutation by integrating Gene Ontology (GO) [<xref ref-type="bibr" rid="pone.0233438.ref029">29</xref>].</p>
</list-item>
<list-item>
<p>SNP Extraction Tool for Human Variations (SETH) [<xref ref-type="bibr" rid="pone.0233438.ref030">30</xref>] is an entity recognition tool that extends MutationFinder. SETH can recognize the following subtypes of mutations: substitution, deletion, insertion, duplication, insertion-deletion (insdel), inversion, conversion, translocation, frameshift, short-sequence repeat, and literal dbSNP mention. SETH also normalizes the genetic variant to a standard RSid.</p>
</list-item>
<list-item>
<p>tmVar [<xref ref-type="bibr" rid="pone.0233438.ref031">31</xref>] is a mutation extraction tool based on a conditional random field model and covers a wide range of sequence variants at both protein and gene levels in HGVS format.</p>
</list-item>
<list-item>
<p>tmVar 2 [<xref ref-type="bibr" rid="pone.0233438.ref032">32</xref>] builds on tmVar to automatically extract and map variants to unique identifiers (dbSNP RSIDs). tmVar 2.0 achieved nearly 90% in F-measures for normalizing the mutations ids and also compared well to SETH.</p>
</list-item>
</list>
<p>Yepes and Verspoor, 2014 [<xref ref-type="bibr" rid="pone.0233438.ref033">33</xref>] provide an overview of relative performance between the different mutation extraction tools. For this study, the MutationFinder tool was chosen for its precision and recall. A text processing pipeline was developed to first extract RSids (SNP mutations) using pattern matching; the MutationFinder tool was then applied to extract protein mutations. No changes were made to the MutationFinder Java code.</p>
</sec>
</sec>
<sec id="sec012">
<title>Programming packages</title>
<p>Tools used throughout the project are displayed in <xref ref-type="table" rid="pone.0233438.t001">Table 1</xref>. Java was the primary programming language.</p>
<table-wrap id="pone.0233438.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t001</object-id>
<label>Table 1</label>
<caption>
<title>Software libraries used in this study.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t001" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" style="border-bottom:thick"/>
<th align="left" style="border-bottom:thick">Software</th>
<th align="left" style="border-bottom:thick">Details</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">1</td>
<td align="left">SAX Parser [<xref ref-type="bibr" rid="pone.0233438.ref034">34</xref>]</td>
<td align="left">Parsing XML of Clinical Trials</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">Apache OpenNLP [<xref ref-type="bibr" rid="pone.0233438.ref035">35</xref>]</td>
<td align="left">NLP parser for SNP mutations</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">MutationFinder [<xref ref-type="bibr" rid="pone.0233438.ref027">27</xref>]</td>
<td align="left">Protein mutation detection</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">Bootstrap [<xref ref-type="bibr" rid="pone.0233438.ref036">36</xref>]</td>
<td align="left">CSS files for HTML</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">Amazon Web Services (AWS [<xref ref-type="bibr" rid="pone.0233438.ref037">37</xref>])</td>
<td align="left">To host HTML reports</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">Jupyter Notebook (Google Colab [<xref ref-type="bibr" rid="pone.0233438.ref038">38</xref>])</td>
<td align="left">Python example to access API</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Java Client API</td>
<td align="left">To access results programmatically</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t001fn001">
<p>The software tools used and their descriptions. Software libraries 1, 2, and 3 aided in locating mutations in the text files while libraries 4 and 5 facilitated the creation of the reports and website. Software tools 6 and 7 were employed to enhance the accessibility of the results.</p>
</fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec013">
<title>Analysis steps</title>
<p>The seven main analysis steps are illustrated in <xref ref-type="fig" rid="pone.0233438.g001">Fig 1</xref> and described in detail below.</p>
<list list-type="order">
<list-item>
<p><bold>Download:</bold> XML files from ClinicalTrials.gov and HPO data files.</p>
</list-item>
<list-item>
<p><bold>Parse:</bold> The Java SAX parser framework efficiently parsed the ClinicalTrials.gov XML files. In this step, for a given clinical-trial XML file, a fully-instantiated JavaBean class was created to represent the Clinical Trial. Key XML. fields used in this study include Title, Summary, Study Type, Description, Outcomes, Arm, Study Design, MeSH Terms, Outcomes, Conditions, Intervention, Phase, Observational Model, and Keywords. The MeSH terms referenced in the XML were mapped to their MeSH ids using the procedure explained below:</p>
<list list-type="alpha-lower">
<list-item>
<p>Created a list of MeSH terms referenced across all clinical trials.</p>
</list-item>
<list-item>
<p>Retrieved MeSH ids using the MeSH online tool [<xref ref-type="bibr" rid="pone.0233438.ref020">20</xref>] for each of the MeSH terms in the list.</p>
</list-item>
</list>
<p>In the same manner, the HPO ontology file was parsed to create a parent-child hierarchy: HPO annotation files were parsed, and associations between HPO nodes and genes were noted.</p>
</list-item>
<list-item>
<p><bold>Text Processing:</bold> The Apache OpenNLP library was utilized to parse the clinical trials into sentences. Using OpenNLP, a series of classes were created to effectively tokenize the various sentences. Regular Expressions were used to detect SNPs and protein mutations. For instance, detailed below is the process of detecting key entities:</p>
<list list-type="alpha-lower">
<list-item>
<p>Parse XML using SAX Parser.</p>
</list-item>
<list-item>
<p>Create a JavaBean instance with attributes.</p>
</list-item>
<list-item>
<p>Tokenize text by splitting the paragraphs into sentences and then sentence to tokens.</p>
</list-item>
<list-item>
<p>Regular Expressions were used to determine if a specific token was either a protein mutation or an SNP. As detailed in “SNP Extraction” and “Protein mutation,” particular regular expressions denoted the presence of a mutation.</p>
</list-item>
</list>
</list-item>
<list-item>
<p><bold>Text Analyzers:</bold> Several crawlers were created to traverse through the local XML files and extract relevant information. Functions of the text processors are the following: create an index of all clinical trials; associate conditions with the clinical trials; extract SNPs, protein mutations, and MeSH terms from the tokens; derive frequency information and reports for SNPs, protein mutations, HPO nodes, MeSH nodes, etc.; and map clinical trials to HPO terms (in essence, normalizing to HPO nodes). Normalization is discussed further below.</p>
</list-item>
<list-item>
<p><bold>Normalization:</bold> Clinical trials were mapped to HPO nodes through the following process:</p>
<list list-type="alpha-lower">
<list-item>
<p>MeSH ids were associated with HPO ids using the HPO data file.</p>
</list-item>
<list-item>
<p>HPO ids are linked to an HPO node. Thus, clinical trials were correlated to MeSH terms, MeSH ids, and finally HPO nodes.</p>
</list-item>
</list>
<p>The steps normalized the HPO terms to standardize correlations between overlapping terms.</p>
</list-item>
<list-item>
<p><bold>Report Generators:</bold> Reports were generated to analyze the processed data, display detailed information for each of the mutations, and showcase elements of the clinical trials in which the mutations appear.</p>
</list-item>
<list-item>
<p><bold>Host Reports:</bold> The final reports are hosted on an AWS S3 bucket [<xref ref-type="bibr" rid="pone.0233438.ref037">37</xref>]. Note that these static-hyperlinked-HTML reports support user interactions. Java client APIs, along with a Google Colab document (Jupyter Notebook using Python), was created to make the produced analytics and results accessible programmatically.</p>
</list-item>
</list>
<fig id="pone.0233438.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Seven steps of the pipeline.</title>
<p>Methodology to mine ClinicalTrials.gov to extract unique insights for understanding SNPs and mutations. Each of the steps is described in detail in the “Analysis Steps” section.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.g001" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec014">
<title>Machine learning example</title>
<p>We conducted a simple example of how the insights produced from this work can be applied biologically via machine learning. In this instance, clusters of similar HPO terms are desired for research purposes. It was decided to identify alike HPO terms by analyzing the correlations between SNPs and HPO terms. For example, if two HPO terms were linked to an SNP, those two terms would have a high probability of being related. The Java code for this example is available at the SNP Miner Trials homepage (package: com.snpminertrials.ct.snp.ml.HPOSnpClustering). To illustrate how machine learning can be applied to the results and analytics produced, the following procedure was applied to solve the presented example:</p>
<list list-type="order">
<list-item>
<p>Use the tabular data (available from the homepage) to create an incidence matrix where each row is an HPO node and each column an SNP. There are <italic>m</italic> HPO nodes and <italic>n</italic> SNPs. A value of one is inputted every time the <italic>m</italic><sub><italic>i</italic></sub> HPO node is correlated with the <italic>n</italic><sub><italic>i</italic></sub> SNP term. Else, a value of zero is inserted for the element.</p>
</list-item>
<list-item>
<p>Normalize the data by creating a unit vector for each HPO term. Unit vectors are obtained by dividing each element of a row by the magnitude of that row.</p>
</list-item>
<list-item>
<p>For each HPO term, compute the pair-wise dot product between its vector and all other vectors. The resulting vector is a metric of normalized correlation.</p>
</list-item>
<list-item>
<p>Sort the results to create a prioritized list of related HPO terms</p>
</list-item>
</list>
<p>Hierarchical Clustering [<xref ref-type="bibr" rid="pone.0233438.ref041">41</xref>] or K-Means [<xref ref-type="bibr" rid="pone.0233438.ref042">42</xref>] could also be used to find clusters of related HPO terms. A similar process can be used with protein mutations—in place of SNPs—as well. Alternatively, HPO terms could be clustered based on both protein and SNP mutations. The rows and columns can be switched to cluster similar SNP/protein mutations by their associated HPO terms [<xref ref-type="bibr" rid="pone.0233438.ref043">43</xref>].</p>
</sec>
</sec>
<sec id="sec015" sec-type="results">
<title>Results</title>
<p>The “Results” section comprises of six sub-topics:</p>
<list list-type="order">
<list-item>
<p>Details on the created public repository to provide access to the data used, reports created, correlations mapped, and APIs produced.</p>
</list-item>
<list-item>
<p>Insights about the ClinicalTrials.gov corpus after normalizing the data using MeSH and HPO ontologies.</p>
</list-item>
<list-item>
<p>Insights about the mined SNPs.</p>
</list-item>
<list-item>
<p>Insights about the extracted protein mutations.</p>
</list-item>
<list-item>
<p>Analysis of popular interventions.</p>
</list-item>
<list-item>
<p>Findings related to the machine learning example.</p>
</list-item>
</list>
<sec id="sec016">
<title>Public repository</title>
<sec id="sec017">
<title>Web page to access longitudinal analysis data, reports, and APIs</title>
<p>All analysis results are accessible via the SNP Miner Results home page, available at <ext-link ext-link-type="uri" xlink:href="http://snpminertrials.com" xlink:type="simple">http://snpminertrials.com</ext-link>. A view of the home page is seen in <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref>. The web page provides access to data and reports from multiple time frames. As of March 2020, there are two analysis time points: August 2019 and March 2020. Additionally, the home page has links to Java APIs and Google Colab pages, which facilitate easy local access to the insights and results of this research. The SNP Miner Results home page provides the latest analysis results, and—due to the constant influx of new clinical trials, enhancements to HPO, and HPO annotation files—the results are subject to change.</p>
<p>Java APIs, as well as a Google Colab Notebook (see <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref>) with Python, allow the results to be easily accessed programmatically.</p>
<p>The functionalities of the various APIs are to retrieve information about the following:</p>
<list list-type="order">
<list-item>
<p>The MeSH terms and MeSH ids used to tag the Clinicaltrial.gov corpus</p>
</list-item>
<list-item>
<p>HPO terms and their corresponding clinical trials</p>
</list-item>
<list-item>
<p>RSids and their corresponding clinical trials</p>
</list-item>
<list-item>
<p>*Relevant MeSH ids and their correlated clinical trials</p>
</list-item>
<list-item>
<p>*Relevant HPO ids and their correlated clinical trials</p>
</list-item>
<list-item>
<p>Protein mutations and their corresponding clinical trials</p>
</list-item>
</list>
<p>*Only the specific terms that have any correlation to a mutation are shown.</p>
<p>Additionally, there are results discussing the machine learning example mentioned earlier.</p>
</sec>
</sec>
<sec id="sec018">
<title>Term normalization</title>
<p>The clinical trial XML contains a field called “Condition”, which is a free-formed annotation associated with the clinical trial. <xref ref-type="supplementary-material" rid="pone.0233438.s002">S2 Fig</xref> shows frequently occurring conditions (referenced more than 1,000 times) across the clinical trial documents. Since these conditions are free-formed and not mapped to a standard ontology, multiple distinct terms refer to the same condition. For example, six terms that refer to “Type 1 Diabetes”—“Diabetes Mellitus, Type 1,” “Type 1 Diabetes,” “Type 1 Diabetes Mellitus,” “Type1diabetes,” “Type1 Diabetes Mellitus,” and “Diabetes Mellitus Type 1” appear throughout the clinical trials. Standard ontologies such as MeSH and HPO map these variant terms to a single ontology node: D003922 [<xref ref-type="bibr" rid="pone.0233438.ref039">39</xref>] for MeSH and HP:0100651 [<xref ref-type="bibr" rid="pone.0233438.ref040">40</xref>] for HPO. There were 87,656 unique conditions, and 559,918 total condition mentions. Thus, normalization was pivotal in standardizing the results.</p>
<p>In the XML data, each clinical trial contains a list of associated MeSH tags. As described in the “Methods” section, these MeSH tags were useful in linking MeSH terms to HPO terms and MeSH ids to HPO ids.</p>
<p>Using information about MeSH tags, multiple analytics were produced: 6,643 unique MeSH tags have been cited 568,784 times across the 332,418 clinical trials; approximately 81% of the clinical trials have a MeSH annotation, and around 62% of the trials have a MeSH annotation with an associated HPO term mapped to a gene. <xref ref-type="supplementary-material" rid="pone.0233438.s002">S2 Fig</xref> displays all of the MeSH terms with at least 2,000 total tags ranked by frequency.</p>
</sec>
<sec id="sec019">
<title>Results from extracting RSids</title>
<p>There were 566 unique RSids across 368 clinical trials, with a total of 798 mentions. <xref ref-type="table" rid="pone.0233438.t002">Table 2</xref> contains the top three most frequently occurring RSids, while <xref ref-type="supplementary-material" rid="pone.0233438.s002">S2 Fig</xref> shows a tabular view of frequently occurring SNPs and HPO terms. rs12979860 co-occurs with “HP:0012115 Hepatitis” 33 times. rs12979860, which occurs near <italic>IL28B</italic>, is in fact used for selecting Hepatitis C treatment [<xref ref-type="bibr" rid="pone.0233438.ref044">44</xref>], validating the methodology and results. Other notable SNPs referenced multiple times across the corpus are rs6971, which appears is associated with brain diseases [<xref ref-type="bibr" rid="pone.0233438.ref046">46</xref>] and rs9939609, which is associated with fat mass and obesity [<xref ref-type="bibr" rid="pone.0233438.ref047">47</xref>]. All of these results help validate the pipeline employed since all of these SNPs have already been commonly known and studied.</p>
<table-wrap id="pone.0233438.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t002</object-id>
<label>Table 2</label>
<caption>
<title>Most frequent RSids across ClinicalTrials.gov.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t002" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="left">RSid</th>
<th align="left">Count</th>
<th align="left">HPO Node</th>
<th align="left">HPO Node Name</th>
<th align="left">Count</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" rowspan="5">1</td>
<td align="left" rowspan="5">rs12979860</td>
<td align="left" rowspan="5">38</td>
<td align="left">HP:0012115</td>
<td align="center">Hepatitis</td>
<td align="left">33</td>
</tr>
<tr>
<td align="left">HP:0200123</td>
<td align="center">Chronic hepatitis</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">HP:0001402</td>
<td align="center">Hepatocellular carcinoma</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">HP:0030731</td>
<td align="center">Carcinoma</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0001392</td>
<td align="center">Abnormality of the liver</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left" rowspan="12">2</td>
<td align="left" rowspan="12">rs6971</td>
<td align="left" rowspan="12">26</td>
<td align="left">HP:0002511</td>
<td align="center">Alzheimer disease</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">HP:0006802</td>
<td align="center">Abnormal anterior horn cell morphology</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">HP:0007354</td>
<td align="center">Amyotrophic lateral sclerosis</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">HP:0100753</td>
<td align="center">Schizophrenia</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0000729</td>
<td align="center">Psychosis</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0000709</td>
<td align="center">Encephalitis</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0002383</td>
<td align="center">Psychosis</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0000717</td>
<td align="center">Autism</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0000716</td>
<td align="center">Depressivity</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0002180</td>
<td align="center">Neurodegeneration</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0001658</td>
<td align="center">Myocardial infarction</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0001268</td>
<td align="center">Mental deterioration</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left" rowspan="5">3</td>
<td align="left" rowspan="5">rs9939609</td>
<td align="left" rowspan="5">11</td>
<td align="left">HP:0001513</td>
<td align="center">Obesity</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">HP:0000819</td>
<td align="center">Diabetes mellitus</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0001824</td>
<td align="center">Weight loss</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0000855</td>
<td align="center">Insulin resistance</td>
<td align="left">1</td>
</tr>
<tr>
<td align="left">HP:0100651</td>
<td align="center">Type I diabetes mellitus</td>
<td align="left">1</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t002fn001">
<p>Most frequent RSids extracted across ClinicalTrials.gov.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<sec id="sec020">
<title>Validation case</title>
<p>To further validate the pipeline, 37 SNPs associated with “HP:0003002 Breast carcinoma” were analyzed. These SNPs are rs1011970, rs10407022, rs1045485, rs10941679, rs10995190, rs11045585, rs11133360, rs11249433, rs12762549, rs13281615, rs13387042, rs16942, rs1800566, rs2002555, rs2046210, rs2237060, rs2241193, rs2297480, rs236114, rs2380205, rs271924, rs2981582, rs3803662, rs3817198, rs4073, rs4646, rs4973768, rs614367, rs6504950, rs704010, rs7333181, rs7349683, rs889312, rs909253, rs9344, rs9457827, and rs999737. Each one of these were manually verified for associations with breast cancer. As expected, each and every one of them had a known association with breast cancer, further illustrating the accuracy and effectiveness of the methodology. The Java API toolkit includes an API that returns a list of SNPs for an associated HPO node.</p>
</sec>
<sec id="sec021">
<title>MeSH terms, HPO terms, and reports</title>
<p>
<xref ref-type="supplementary-material" rid="pone.0233438.s002">S2 Fig</xref> illustrates the most prominent MeSH ids referenced across the 368 clinical trials with RSids. Interestingly, the first set of MeSH terms was related to Hepatitis, with more than 10% (37 out of 368) of clinical trials falling into this category, demonstrating the quantity of research involving mutations and Hepatitis.</p>
<p>The most cited HPO terms fall into the areas of Hepatitis, Diabetes, Cancer (Breast carcinoma, Leukemia), abnormality of the cardiovascular system, and Schizophrenia. <xref ref-type="supplementary-material" rid="pone.0233438.s002">S2 Fig</xref> shows the key HPO terms with associated SNPs across the clinical trial corpus. The 368 clinical trials mapped to 136 different HPO terms and were referenced 368 times. The frequency of HPO terms sheds light on the areas that researchers are prominently interested in.</p>
<p>
<xref ref-type="table" rid="pone.0233438.t003">Table 3</xref> shows the top HPO nodes with the highest occurring RSids. Breast carcinoma had 38 unique RSids associated with it, suggesting that genetic mutations possibly influence Breast Cancer. Other diseases with the most number of associated RSids include Impulsivity, Aggressive behavior, Diabetes mellitus, Hepatitis, and Asthma.</p>
<table-wrap id="pone.0233438.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t003</object-id>
<label>Table 3</label>
<caption>
<title>HPO Terms with the most number of associated RSids.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t003" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="left">HPO Id</th>
<th align="left">Name</th>
<th align="left">#</th>
<th align="left">RSid</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">1</td>
<td align="left">HP:0003002</td>
<td align="left">Breast carcinoma</td>
<td align="left">37</td>
<td align="center">rs1011970,rs10407022,rs1045485,rs10941679,rs10995190,rs11045585,rs11133360,rs11249433,rs12762549,rs13281615,rs13387042,rs16942,rs1800566,rs2002555,rs2046210,rs2237060,rs2241193,rs2297480,rs236114,rs2380205,rs271924,rs2981582,rs3803662,rs3817198,rs4073,rs4646,rs4973768,rs614367,rs6504950,rs704010,rs7333181,rs7349683,rs889312,rs909253,rs9344,rs9457827,rs999737,</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">HP:0100710</td>
<td align="left">Impulsivity</td>
<td align="left">23</td>
<td align="center">rs1042713,rs1079598,rs1150226,rs1549339,rs16111115,rs1672717,rs1800497,rs1800955,rs1801253,rs2242447,rs2278392,rs2550946,rs4532,rs4680,rs4994,rs518147,rs553668,rs5569,rs6269,rs6280,rs6295,rs6296,rs6311</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">HP:0000718</td>
<td align="left">Aggressive behavior</td>
<td align="left">23</td>
<td align="center">rs1042713,rs1079598,rs1150226,rs1549339,rs16111115,rs1672717,rs1800497,rs1800955,rs1801253,rs2242447,rs2278392,rs2550946,rs4532,rs4680,rs4994,rs518147,rs553668,rs5569,rs6269,rs6280,rs6295,rs6296,rs6311</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">HP:0000819</td>
<td align="left">Diabetes mellitus</td>
<td align="left">22</td>
<td align="center">rs10830963,rs12469968,rs13266634,rs2266782,rs2281135,rs2284872,rs2294918,rs35652124,rs35874116,rs35874116rs,rs3765467,rs3788979,rs5215,rs5219,rs738409,rs7565794,rs780094,rs780094s,rs78408340,rs7903146,rs9701796,rs9939609
</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">HP:0012115<break/>HP:0200123</td>
<td align="left">Hepatitis Chronic hepatitis</td>
<td align="left">20</td>
<td align="center">rs10813831,rs1127354,rs11795404,rs12356193,rs12979860,rs12992677,rs17037122,rs179008,rs2066842,rs2067085,rs2464266,rs3853839,rs41308230,rs4588,rs5743844,rs6592052,rs7041,rs7270101,rs7549785,rs8099917
</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">HP:0002099</td>
<td align="left">Asthma</td>
<td align="left">18</td>
<td align="center">rs1042711,rs1042713,rs1042714,rs1042718,rs11958940,rs11959427,rs12654778,rs12936231,rs1504982,rs17778257,rs1800888,rs1801275,rs1805010,rs2053044,rs2895795,rs324011,rs324015,rs4950928
</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">HP:0001257</td>
<td align="left">Spasticity</td>
<td align="left">18</td>
<td align="center">rs1049522,rs1049524,rs137852620,rs2032892,rs2269272,rs2269273,rs2562582,rs2731886,rs377637047,rs4869675,rs4869676,rs529802001,rs544684689,rs547987105,rs549927573,rs550842646,rs562696473,rs573562920
</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">HP:0001638</td>
<td align="left">Cardiomyopathy</td>
<td align="left">18</td>
<td align="center">rs1042522,rs1042522s,rs1056892,rs10836235,rs10865801,rs1128503,rs1149222,rs13058338,rs1465952,rs1786378374,rs1883112,rs2229774,rs2279744,rs35599367,rs3761624,rs45511401,rs4673,rs7853758</td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">HP:0001677</td>
<td align="left">Coronary arteryatherosclerosis</td>
<td align="left">16</td>
<td align="center">rs10153820,rs1143623,rs1143633,rs1143634,rs12041331,rs16944,rs16969968,rs17561,rs1761667,rs2305619,rs4848306,rs6434222,rs7586970,rs7903146,rs8069645,rs8176528
</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">HP:0001909</td>
<td align="left">Leukemia</td>
<td align="left">15</td>
<td align="center">rs10509681,rs11572080,rs12459419,rs172378,rs2032582,rs230561,rs25531,rs3816527,rs396991,rs4880,rs4958351,rs6190,rs628031,rs776746,rs904627
</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t003fn001">
<p>The 368 clinical trials with RSids mapped to 136 unique HPO terms.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>An HTML report was created for each of the 566 unique RSids, and reports over multiple time periods are freely available via the home page (<ext-link ext-link-type="uri" xlink:href="http://www.snpminertrials.com" xlink:type="simple">http://www.snpminertrials.com</ext-link>). As shown in <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref>, each report contains a list of the clinical trials in which the SNP appears, along with the sentences containing the SNP. Each clinical trial report also shows the mapped HPO as well as MeSH terms, both of which are hyperlinked to other reports and external resources. As shown in <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref>, the HPO terms and their associated genes are also displayed at the bottom of the report. All 566 SNPs are displayed on the left-hand side of the report to enable easy navigation across the RSids.</p>
<p>Similarly, an HTML report was generated for each of the 368 unique clinical trials that mentioned SNPs. Reports, over multiple time periods, are freely available. As shown in <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref>, all reports contain the details of the clinical trial, the list of SNPs mentioned, and the sentences in which each SNP appears. Every clinical trial report shows the mapped HPO and MeSH terms, which are also hyperlinked. <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref> highlights the unique RSid terms and their associated sentences, which are also displayed at the bottom of the report. All the 368 clinical trial ids are displayed on the left-hand side of the report to enable easy navigation across the clinical trials.</p>
</sec>
</sec>
<sec id="sec022">
<title>Results of extracting protein mutations from the clinical trial corpus using MutationFinder</title>
<p>There were 962 unique protein mutations across 1,939 clinical trials, with a total of 3,881 mentions.</p>
<p>
<xref ref-type="table" rid="pone.0233438.t004">Table 4</xref> contains the top four most frequently occurring protein mutations. The protein L858R is cited in 293 clinical trials, out of which 233 clinical trials mapped to HPO node “HP:0030358, Non-small cell lung carcinoma,” suggesting a correlation between L858R and Lung Cancer. The 293 clinical trials that mention the L858R map to 21 HPO nodes, most of which are associated with Cancer. E.g., “HPO:0100526 Neoplasm of the lung”, “HP:0030731 Carcinoma”, “HP:0030692 Brain Neoplasm”, etc. Similarly, T790M (synonym, Thr790Met) is cited across 289 clinical trials, which frequently map to cancer-related HPO nodes, indicating the vast amount of Cancer research performed. V600E and T315I, with 228 and 98 citations respectively, are the next two most commonly cited protein mutations. V600E is associated with Cutaneous melanoma, Neoplasm of the large intestine, and Thyroid adenoma, while T315I is associated with Leukemia, Chronic myelogenous Leukemia, and Myeloid leukemia.</p>
<table-wrap id="pone.0233438.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t004</object-id>
<label>Table 4</label>
<caption>
<title>Most frequent mutations across ClinicalTrials.gov.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t004g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t004" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" style="border-bottom:thick"/>
<th align="left" style="border-bottom:thick">Mutation</th>
<th align="left" style="border-bottom:thick">Synonyms</th>
<th align="left" style="border-bottom:thick">Count</th>
<th align="left" style="border-bottom:thick">HPO Node</th>
<th align="left">HPO Node Name</th>
<th align="left">Count</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" rowspan="9">1</td>
<td align="left" rowspan="9">L858R</td>
<td align="left" rowspan="9">leucine to arginine at codon 858 leucine-to-arginine mutation at codon 858</td>
<td align="left" rowspan="9">293</td>
<td align="left">HP:0030358</td>
<td align="center">Non-small cell lung carcinoma</td>
<td align="left">233</td>
</tr>
<tr>
<td align="left">HP:0100526</td>
<td align="center">Neoplasm of the lung</td>
<td align="left">165</td>
</tr>
<tr>
<td align="left">HP:0030731</td>
<td align="center">Carcinoma</td>
<td align="left">16</td>
</tr>
<tr>
<td align="left">HP:0002664</td>
<td align="center">Neoplasm</td>
<td align="left">6</td>
</tr>
<tr>
<td align="left">HP:0030692</td>
<td align="center">Brain neoplasm lung morphology</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">HP:0002088</td>
<td align="center">Cutaneous melanoma</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">HP:0012056</td>
<td align="center">Pleural effusion</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">HP:0002202</td>
<td align="center">14 more…</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">…</td>
<td align="center"/>
<td align="left">…</td>
</tr>
<tr>
<td align="left" rowspan="8">2</td>
<td align="left" rowspan="8">T790M</td>
<td align="left" rowspan="8">Thr790Met</td>
<td align="left" rowspan="8">289</td>
<td align="left">HP:0030358</td>
<td align="center">Non-small cell lung carcinoma</td>
<td align="left">222</td>
</tr>
<tr>
<td align="left">HP:0100526</td>
<td align="center">Neoplasm of the lung</td>
<td align="left">154</td>
</tr>
<tr>
<td align="left">HP:0030731</td>
<td align="center">Carcinoma</td>
<td align="left">20</td>
</tr>
<tr>
<td align="left">HP:0002664</td>
<td align="center">Neoplasm</td>
<td align="left">10</td>
</tr>
<tr>
<td align="left">HP:0002088</td>
<td align="center">Abnormal lung morphology</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">HP:0030357</td>
<td align="center">Small cell lung carcinoma</td>
<td align="left">3</td>
</tr>
<tr>
<td align="left">HP:0005584</td>
<td align="center">Renal cell carcinoma</td>
<td align="left">2</td>
</tr>
<tr>
<td align="left">…</td>
<td align="center">17 more…</td>
<td align="left">..</td>
</tr>
<tr>
<td align="left" rowspan="8">3</td>
<td align="left" rowspan="8">V600E</td>
<td align="left" rowspan="8"/>
<td align="left" rowspan="8">228</td>
<td align="left">HP:0012056</td>
<td align="center">Cutaneous melanoma</td>
<td align="left">98</td>
</tr>
<tr>
<td align="left">HP:0100834</td>
<td align="center">Neoplasm of the large intestine</td>
<td align="left">31</td></tr>
<tr>
<td align="left">HP:0030358</td>
<td align="center">Non-small cell lung carcinoma</td>
<td align="left">28</td>
</tr>
<tr>
<td align="left">HP:0100526</td>
<td align="center">Neoplasm of the lung</td>
<td align="left">25</td>
</tr>
<tr>
<td align="left">HP:0002664</td>
<td align="center">Neoplasm</td>
<td align="left">21</td>
</tr>
<tr>
<td align="left">HP:0030731</td>
<td align="center">Carcinoma</td>
<td align="left">15</td>
</tr>
<tr>
<td align="left">HP:0000854</td>
<td align="center">Thyroid adenoma</td>
<td align="left">13</td>
</tr>
<tr>
<td align="left">…</td>
<td align="center">53 more…</td>
<td align="left">13<break/>..</td>
</tr>
<tr>
<td align="left" rowspan="7">4</td>
<td align="left" rowspan="7">T315I</td>
<td align="left" rowspan="7">Thr315Ile threonine 315 to isoleucine</td>
<td align="left" rowspan="7">98</td>
<td align="left">HP:0001909</td>
<td align="center">Leukemia</td>
<td align="left">83</td>
</tr>
<tr>
<td align="left">HP:0005506</td>
<td align="center">Chronic myelogenous leukemia</td>
<td align="left">73</td>
</tr>
<tr>
<td align="left">HP:0012324</td>
<td align="center">Myeloid leukemia</td>
<td align="left">67</td>
</tr>
<tr>
<td align="left">HP:0005526</td>
<td align="center">Lymphoid leukemia</td>
<td align="left">23</td>
</tr>
<tr>
<td align="left">HP:0004808</td>
<td align="center">Acute myeloid leukemia</td>
<td align="left">5</td>
</tr>
<tr>
<td align="left">HP:0002863</td>
<td align="center">Myelodysplasia</td>
<td align="left">4</td>
</tr>
<tr>
<td align="left">…</td>
<td align="center">14 more…</td>
<td align="left">…</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t004fn001">
<p>The top four commonly cited protein mutations across the clinical trials and their related HPO nodes.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>The 1,939 unique clinical trials that referenced protein mutations were subsequently analyzed. MeSH terms that appear frequently across clinical trials that contain protein mutations are shown in
<xref ref-type="fig" rid="pone.0233438.g002">Fig 2</xref>. <xref ref-type="fig" rid="pone.0233438.g003">Fig 3</xref> illustrates MeSH terms that frequently appear for both the RSid and protein mutation cases. In <xref ref-type="fig" rid="pone.0233438.g003">Fig 3</xref>, multiple MeSH terms are related to Hepatitis and Cancer, further demonstrating the quantity of research in these fields.</p>
<fig id="pone.0233438.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Bubble graph showing the key MeSH nodes used to tag clinical trials with protein mutations.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.g002" xlink:type="simple"/>
</fig>
<fig id="pone.0233438.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Common MeSH terms for clinical trials with RSid and protein mutation frequencies.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.g003" xlink:type="simple"/>
</fig>
<p>Similarly, <xref ref-type="table" rid="pone.0233438.t005">Table 5</xref> portrays the top HPO terms referenced across these 1,939 clinical trials with protein mutations. The HPO node HP:0030358 “Non-small cell lung carcinoma” is associated with 382 clinical trials, followed by HP:0100526 “Neoplasm of the lung” with 284 clinical trials. “Leukemia”, “Cutaneous melanoma,” “Myeloid Leukemia,” “Neoplasm,” “Chronic myelogenous leukemia,” “Myeloid leukemia,” “Carcinoma,” “Neoplasm of the large intestine,” and “Lymphoma” are the remaining HPO terms with the most number of associated clinical trials. The quantity of Cancer nodes possibly suggests a correlation between mutations and Cancer.</p>
<table-wrap id="pone.0233438.t005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t005</object-id>
<label>Table 5</label>
<caption>
<title>HPO Terms with the most cited protein mutations found by MutationsFinder in ClinicalTrials.gov.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t005g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t005" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" style="border-bottom:thick"/>
<th align="left" style="border-bottom:thick">HPO Id</th>
<th align="left" style="border-bottom:thick">Number Clinical Trials</th>
<th align="left" style="border-bottom:thick">HPO Node Name</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">1</td>
<td align="left">HP:0030358</td>
<td align="left">382</td>
<td align="left">Non-small cell lung carcinoma</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">HP:0100526</td>
<td align="left">284</td>
<td align="left">Neoplasm of the lung</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">HP:0001909</td>
<td align="left">106</td>
<td align="left">Leukemia</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">HP:0012056</td>
<td align="left">103</td>
<td align="left">Cutaneous melanoma</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">HP:0002664</td>
<td align="left">78</td>
<td align="left">Neoplasm</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">HP:0005506</td>
<td align="left">75</td>
<td align="left">Chronic myelogenous leukemia</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">HP:0012324</td>
<td align="left">75</td>
<td align="left">Myeloid leukemia</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">HP:0030731</td>
<td align="left">73</td>
<td align="left">Carcinoma</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">HP:0100834</td>
<td align="left">44</td>
<td align="left">Neoplasm of the large intestine</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">HP:0002665</td>
<td align="left">36</td>
<td align="left">Lymphoma</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t005fn001">
<p>The 1,939 clinical trials with mutations mapped to 332 unique HPO terms and were referenced 2,447 times.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Next, analyzing the number of protein mutations for each of the reference HPO terms provides insights, as shown in <xref ref-type="table" rid="pone.0233438.t006">Table 6</xref>. HP:0002664 “Neoplasm” has 75 associated protein mutations, while HP:0003002 ‘Breast Carcinoma’ is next with 73 mutations. “Carcinoma”, “Lymphoma,” “Neoplasm of the lung,” “Leukemia,” “Non-small cell lung carcinoma,” and “Non-Hodgkin lymphoma” are the other top-six HPO nodes with the most number of associated protein mutations.</p>
<table-wrap id="pone.0233438.t006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t006</object-id>
<label>Table 6</label>
<caption>
<title>HPO Terms with the most number of associated mutations.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t006g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t006" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" style="border-bottom:thick"/>
<th align="left" style="border-bottom:thick">HPO Id</th>
<th align="left" style="border-bottom:thick">Name</th>
<th align="left" style="border-bottom:thick">#</th>
<th align="left" style="border-bottom:thick">Mutations</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">1</td>
<td align="left">HP:0002664</td>
<td align="left">Neoplasm</td>
<td align="left">75</td>
<td align="center">C10D,C377T,C677T,C797S,D816V,D835V,D842V,E10A,E17K,E542K,E545K,F1174L,F31I,G12C,G12D,G12V,G13D,G156A,G20210A,G719A,G719C,H1047R,H1112L,H1112Y,H1124D,K652E,L1213V,L265P,L858R,L861Q,M1149T,M1268T,P1009S,P13K,P1446A,P286R,P4503A,Q12H,Q21D,R132C,R132G,R132H,R132L,R132S,R132V,R140L,R140Q,R140W,R172G,R172K,R172M,R172S,R172W,R988C,T1010I,T1191I,T315I,T790M,V1110L,V1206L,V1238I,V411L,V57I,V600D,V600E,V600K,V600M,V600R,V617F,V941L,Y1248C,Y1248D,Y1248H,Y1253D,Y842C</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">HP:0003002</td>
<td align="left">Breast carcinoma</td>
<td align="left">73</td>
<td align="center">A289T,A864V,C3435T,D538G,D769H,D769N,D769Y,D988Y,E380Q,E542K,E545K,E709K,E757A,G309A,G309E,G598V,G776C,G776V,H1047R,I655V,I767M,L536H,L536P,L536Q,L536R,L755P,L755S,L786V,L841V,L858R,L861Q,L869R,P125A,P12A,P13K,P187S,P535H,P596L,R108K,R222C,R572Y,R678Q,R831C,R831H,R849W,R896C,S310F,S310Y,S463P,S653C,S768I,S8814A,S9313A,T47D,T733I,T790M,T798I,T798M,T862I,V244M,V534E,V600E,V659E,V697L,V742I,V769M,V773M,V774M,V777L,V842I,Y537C,Y537N,Y537S</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">HP:0030731</td>
<td align="left">Carcinoma</td>
<td align="left">57</td>
<td align="center">C3435T,C420R,C938A,E10A,E542K,E545A,E545D,E545G,E545K,G1049R,G12C,G20210A,G719A,H1047L,H1047R,H1047Y,I105V,I10A,K751Q,L8585R,L858R,L861Q,M1043I,N345K,N375S,P13K,P286R,Q12W,Q546E,Q546K,Q546L,Q546R,R399Q,R776G,R831C,R88Q,S100P,S1400A,S1400C,S1400D,S1400E,S1400F,S1400G,S1400I,S1400K,S1900A,S1900C,S1900D,S768I,T790M,V411L,V600E,V600K,V600R,V617F,V762A,V843I</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">HP:0002665</td>
<td align="left">Lymphoma</td>
<td align="left">52</td>
<td align="center">A677G,A677V,A687V,C282Y,C481S,E571K,F1174L,G156A,G71R,H1112L,H1112Y,H1124D,H63D,I10A,I1171N,L1213V,L265P,M1149T,M1268T,P1009S,P11A,P13K,P140K,P4503A,Q12H,Q21D,Q28D,R131H,R988C,T1010I,T1191I,T315I,T351I,T790M,V1110L,V1206L,V1238I,V158F,V158M,V600E,V617F,V66M,V941L,Y1248C,Y1248D,Y1248H,Y1253D,Y641C,Y641F,Y641H,Y641N,Y641S</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">HP:0100526</td>
<td align="left">Neoplasm of the lung</td>
<td align="left">52</td>
<td align="center">C1156Y,C797S,D594G,F1174C,F1174V,G1202R,G1269A,G12C,G12D,G469A,G719A,G719C,G719S,G776C,G776V,I10A,L1196M,L1198F,L523S,L755S,L833F,L8585R,L858R,L859R,L861G,L861Q,L861R,N375S,P13K,P4503A,R776G,R831C,S1400A,S1400C,S1400D,S1400E,S1400F,S1400G,S1400I,S1400K,S1800A,S1900A,S1900C,S1900D,S768I,T790M,T81C,T890M,V600E,V769L,V777L,V843I</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">HP:0001909</td>
<td align="left">Leukemia</td>
<td align="left">51</td>
<td align="center">C282Y,C481S,D816V,D835Y,E255K,E255V,F317C,F317L,F317S,F317V,F31I,F359C,F359V,G250E,G71R,H369P,H63D,L248R,L248V,N682S,P140K,P1446A,P4503A,Q12H,Q252H,R132C,R132G,R132H,R132L,R132S,R132V,R140L,R140Q,R140W,R172G,R172K,R172M,R172S,R172W,S1612C,S9333A,T315A,T315I,T351I,V158M,V299L,V57I,V600E,V617F,V66M,Y253H</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">HP:0030358</td>
<td align="left">Non-small cell lung carcinoma</td>
<td align="left">43</td>
<td align="center">C797S,C8092A,D594G,F1174C,F1174V,G1202R,G1269A,G12C,G12D,G12V,G13D,G2032R,G469A,G719A,G719C,G719S,G776C,G776V,I10A,L1196M,L523S,L755S,L833F,L8585R,L858R,L861G,L861Q,L861R,P13K,P4503A,R776G,R831C,S1800A,S1900A,S1900C,S768I,T790M,T81C,V600E,V600K,V769L,V777L,V843I</td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">HP:0012539</td>
<td align="left">Non-Hodgkin lymphoma</td>
<td align="left">42</td>
<td align="center">A1298C,A222V,A677G,A677V,A687V,C677T,F1174L,G71R,H1112L,H1112Y,H1124D,L1213V,M1149T,M1268T,P1009S,P13K,P140K,P4503A,Q12H,Q30R,R988C,T1010I,T1191I,T315I,T790M,V1110L,V1206L,V1238I,V158M,V617F,V66M,V941L,Y1248C,Y1248D,Y1248H,Y1253D,Y641C,Y641F,Y641H,Y641N,Y641S,Y93C</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t006fn001">
<p>The 1,939 clinical trials with mutations mapped to 332 unique HPO terms and were referenced</p>
</fn>
<fn id="t006fn002">
<p>2,447 times.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>
<xref ref-type="fig" rid="pone.0233438.g004">Fig 4</xref> shows the distribution of HPO terms across (a) all clinical trials, (b) those with RSids, and (c) those with protein mutations. Interestingly, Diabetes Mellitus is the most commonly occurring HPO Term across all clinical trials.</p>
<fig id="pone.0233438.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Frequency of different HPO terms across clinical trials, across trials with RSids, and across trials with protein mutations.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.g004" xlink:type="simple"/>
</fig>
<p>HTML reports were created for each of the 962 unique protein mutations and are freely available from the SNP Miner home page (<ext-link ext-link-type="uri" xlink:href="http://snpminerptrials.com" xlink:type="simple">http://snpminerptrials.com</ext-link>). As shown in <xref ref-type="supplementary-material" rid="pone.0233438.s001">S1 Fig</xref>, each report contains a list of clinical trials where the protein mutation appears, along with the sentences containing the mutations. Each protein mutation report shows the mapped HPO as well as MeSH terms. All 962 protein mutations are displayed on the left-hand side of the report to enable easy navigation. Similarly, reports for each of the clinical trials which reference a protein mutation are also available.</p>
</sec>
<sec id="sec023">
<title>Interventions</title>
<p>Interventions (or treatments) are the focus of a clinical trial and are categorized into eleven different types, as shown in <xref ref-type="table" rid="pone.0233438.t007">Table 7</xref>. There are 573,887 unique Intervention tags across the eleven different Intervention Types.</p>
<table-wrap id="pone.0233438.t007" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t007</object-id>
<label>Table 7</label>
<caption>
<title>Intervention types for clinical trials with mutations.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t007g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t007" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" style="border-bottom:thick"/>
<th align="left" style="border-bottom:thick">Intervention Type</th>
<th align="left" style="border-bottom:thick">Number of Clinical Trials</th>
<th align="left" style="border-bottom:thick">Percent mapped to CT with Genes</th>
<th align="left" style="border-bottom:thick">Percent with RSid</th>
<th align="left" style="border-bottom:thick">Percent with mutations</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">1</td>
<td align="left">Behavioral</td>
<td align="left">35,450</td>
<td align="char" char=".">51.5%</td>
<td align="char" char=".">0.055%</td>
<td align="char" char=".">0.12%</td>
</tr>
<tr>
<td align="left">2</td>
<td align="left">Biological</td>
<td align="left">16,370</td>
<td align="char" char=".">54.6%</td>
<td align="char" char=".">0.084%</td>
<td align="char" char=".">0.93%</td>
</tr>
<tr>
<td align="left">3</td>
<td align="left">Combination Product</td>
<td align="left">1152</td>
<td align="char" char=".">61.5%</td>
<td align="char" char=".">0.11%</td>
<td align="char" char=".">0.52%</td>
</tr>
<tr>
<td align="left">4</td>
<td align="left">Device</td>
<td align="left">43,079</td>
<td align="char" char=".">60.1%</td>
<td align="char" char=".">0.025%</td>
<td align="char" char=".">0.1%</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">Diagnostic Test</td>
<td align="left">6,299</td>
<td align="char" char=".">67.6%</td>
<td align="char" char=".">0.255%</td>
<td align="char" char=".">0.4%</td>
</tr>
<tr>
<td align="left">6</td>
<td align="left">Dietary Supplement</td>
<td align="left">10,882</td>
<td align="char" char=".">55.7%</td>
<td align="char" char=".">0.24%</td>
<td align="char" char=".">0.36%</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">Drug</td>
<td align="left">98,048</td>
<td align="char" char=".">65.9%</td>
<td align="char" char=".">0.14%</td>
<td align="char" char="."><bold>1.4%</bold></td>
</tr>
<tr>
<td align="left">8</td>
<td align="left">Genetic</td>
<td align="left">1,189</td>
<td align="char" char=".">72.8%</td>
<td align="char" char="."><bold>2.34%</bold></td>
<td align="char" char="."><bold>4.1%</bold></td>
</tr>
<tr>
<td align="left">9</td>
<td align="left">Other</td>
<td align="left">52,885</td>
<td align="char" char=".">54.8%</td>
<td align="char" char=".">0.12%</td>
<td align="char" char=".">0.43%</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">Procedure</td>
<td align="left">33,045</td>
<td align="char" char=".">62.8%</td>
<td align="char" char=".">0.035%</td>
<td align="char" char=".">0.27%</td>
</tr>
<tr>
<td align="left">11</td>
<td align="left">Radiation</td>
<td align="left">3,650</td>
<td align="char" char="."><bold>83.2%</bold></td>
<td align="char" char=".">0.12%</td>
<td align="char" char="."><bold>1.04%</bold></td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t007fn001">
<p>Eleven different categories of Interventions along with the number of unique tags in each category. Additionally, the percent of clinical trials that mapped to HPO nodes with associated genes, clinical trials with RSids, and clinical trials with protein mutations are illustrated.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Each Intervention tag was categorized into one of two mutually-exclusive categories: one that had a clinical trial with an HPO term (and consequently was associated with a gene), and the other that did not have an HPO term. The last column shows the percentage of Intervention Types that were mapped to clinical trials with associated genes; the Radiation Intervention Type had the highest percentage with 83.2%, indicating the dependence of Radiation research on genetic information. <xref ref-type="fig" rid="pone.0233438.g005">Fig 5</xref> shows four subgraphs: the first illustrates the relative frequency distribution of clinical trial interventions across the eleven categories; the second is the percent distribution of clinical trials with HPO nodes associated with genes; the third depicts the percent of the clinical trials which have an RSid, and the fourth displays percentages of clinical trials that have a protein mutation. As expected, clinical trials with the “Genetic Intervention” type had the highest percent of clinical trials with SNPs and protein mutations, with 2.34% and 4.1%. Intervention types “Drug” and “Radiation” also had a high incidence of protein mutations with 1.4% and 1.04%, respectively, of the clinical trials referencing mutations.</p>
<fig id="pone.0233438.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Percentage of clinical trials in each of the eleven categories with RSids and protein mutations.</title>
<p>(a) The first graph shows the relative frequency of clinical trials in each of the eleven Intervention types. (b) The second shows the percent of clinical trials in each of the categories that link to an HPO term and has an associated gene. (c) The third shows the relative frequency of clinical trials in each of the categories that had an associated RSid. (d) The fourth shows the percent of clinical trials in each of the categories that had an associated protein mutation.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.g005" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec024">
<title>Machine learning application: Results</title>
<p>Three representative HPO nodes were selected to demonstrate the results of the clustering by SNP. The HPO nodes most similar to each are shown in <xref ref-type="table" rid="pone.0233438.t008">Table 8</xref> and discussed below.</p>
<list list-type="order">
<list-item>
<p><bold>HP:0001909 Leukemia</bold>: As expected, the most common HPO nodes related to “HP:0001909 Leukemia” are all associated with different kinds of Leukemia, validating the methodology. Yet, lower in the list, nodes like “HP:0004757 Paroxysmal atrial fibrillation” seem out of place. However, patients with Leukemia are treated with the drug, Ibrutinib, a Bruton’s tyrosine kinase inhibitor [<xref ref-type="bibr" rid="pone.0233438.ref048">48</xref>] that has two adverse effects: atrial fibrillation and bleeding. Therefore, “HP:0004757 Paroxysmal atrial fibrillation” is correctly linked to “HP:0001909 Leukemia,” illustrating that this machine learning example incorporates multiple features of HPO Nodes and their corresponding mutations to highlight interesting and possibly novel correlations. Similarly, Leukemia is related to Dysmenorrhea [<xref ref-type="bibr" rid="pone.0233438.ref049">49</xref>] and Depressivity [<xref ref-type="bibr" rid="pone.0233438.ref050">50</xref>] through this methodology, illustrating the effectiveness of such Machine Learning applications in possibly finding novel correlations between diseases/conditions.</p>
</list-item>
<list-item>
<p><bold>HP:0000819 Diabetes mellitus</bold>: As expected, “HP:0000819 Diabetes mellitus” is associated with different elements of diabetes, kidneys, weight, insulin, the gastrointestinal tract, livers, and the cardiovascular system, further validating the methodology and pipeline.</p>
</list-item>
<list-item>
<p><bold>HP:0001824 Weight loss</bold>: As the last example, the generic non-disease term “Weight Loss” was selected. “Weight Loss” still worked outstandingly in the algorithm as common correlations were related to the gastrointestinal tract, blood-forming tissues, diabetes, kidneys, insulin, liver, and the cardiovascular system.</p>
</list-item>
</list>
<table-wrap id="pone.0233438.t008" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0233438.t008</object-id>
<label>Table 8</label>
<caption>
<title>Related HPO terms using co-occurrences of RSids and HPO terms.</title>
</caption>
<alternatives>
<graphic id="pone.0233438.t008g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.t008" xlink:type="simple"/>
<table border="0" frame="box" rules="all">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center"/>
<th align="center">HPO Id</th>
<th align="center">HPO Term</th>
<th align="center">Related HPO Term</th>
<th align="center">Score</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="10">1</td>
<td align="center" rowspan="10">HP:0001909</td>
<td align="center" rowspan="10">Leukemia</td>
<td align="center">HP:0012324 Myeloid leukemia</td>
<td align="char" char=".">0.69</td>
</tr>
<tr>
<td align="center">HP:0005526 Lymphoid leukemia</td>
<td align="char" char=".">0.58</td>
</tr>
<tr>
<td align="center">HP:0005506 Chronic myelogenous leukemia</td>
<td align="char" char=".">0.58</td>
</tr>
<tr>
<td align="center">HP:0002665 Lymphoma</td>
<td align="char" char=".">0.45</td>
</tr>
<tr>
<td align="center">HP:0004808 Acute myeloid leukemia</td>
<td align="char" char=".">0.39</td>
</tr>
<tr>
<td align="center">HP:0005550 Chronic lymphatic leukemia</td>
<td align="char" char=".">0.37</td>
</tr>
<tr>
<td align="center">HP:0012539 Non-Hodgkin lymphoma</td>
<td align="char" char=".">0.26</td>
</tr>
<tr>
<td align="center">HP:0004757 Paroxysmal atrial fibrillation</td>
<td align="char" char=".">0.13</td>
</tr>
<tr>
<td align="center">HP:0100607 Dysmenorrhea</td>
<td align="char" char=".">0.12</td>
</tr>
<tr>
<td align="center">HP:0000716 Depressivity</td>
<td align="char" char=".">0.1</td>
</tr>
<tr>
<td align="center" rowspan="14">2</td>
<td align="center" rowspan="14">HP:0000819</td>
<td align="center" rowspan="14">Diabetes mellitus</td>
<td align="center">HP:0005978 Type II diabetes mellitus</td>
<td align="char" char=".">0.57</td>
</tr>
<tr>
<td align="center">HP:0100651 Type I diabetes mellitus</td>
<td align="char" char=".">0.5</td>
</tr>
<tr>
<td align="center">HP:0000077 Abnormality of the kidney</td>
<td align="char" char=".">0.45</td>
</tr>
<tr>
<td align="center">HP:0011998 Postprandial hyperglycemia</td>
<td align="char" char=".">0.45</td>
</tr>
<tr>
<td align="center">HP:0012622 Chronic kidney disease</td>
<td align="char" char=".">0.38</td>
</tr>
<tr>
<td align="center">HP:0001824 Weight loss</td>
<td align="char" char=".">0.29</td>
</tr>
<tr>
<td align="center">HP:0001392 Abnormality of the liver</td>
<td align="char" char=".">0.27</td>
</tr>
<tr>
<td align="center">HP:0000855 Insulin resistance</td>
<td align="char" char=".">0.27</td>
</tr>
<tr>
<td align="center">HP:0011024 Abnormality of the gastrointestinal tract</td>
<td align="char" char=".">0.25</td>
</tr>
<tr>
<td align="center">HP:0001871 Abnormality of blood and blood-forming tissues</td>
<td align="char" char=".">0.25</td>
</tr>
<tr>
<td align="center">HP:0001397 Hepatic steatosis</td>
<td align="char" char=".">0.24</td>
</tr>
<tr>
<td align="center">HP:0001513 Obesity</td>
<td align="char" char=".">0.12</td>
</tr>
<tr>
<td align="center">HP:0001626 Abnormality of the cardiovascular system</td>
<td align="char" char=".">0.067</td>
</tr>
<tr>
<td align="center">HP:0001677 Coronary artery atherosclerosis</td>
<td align="char" char=".">0.057</td>
</tr>
<tr>
<td align="center" rowspan="11">3</td>
<td align="center" rowspan="11">HP:0001824</td>
<td align="center" rowspan="11">Weight loss</td>
<td align="center">HP:0011024 Abnormality of the gastrointestinal tract</td>
<td align="center"/>
</tr>
<tr>
<td align="center">HP:0001871 Abnormality of blood and blood-forming tissues</td>
<td align="char" char=".">0.58</td>
</tr>
<tr>
<td align="center">HP:0000819 Diabetes mellitus</td>
<td align="char" char=".">0.58</td>
</tr>
<tr>
<td align="center">HP:0012622 Chronic kidney disease</td>
<td align="char" char=".">0.29</td>
</tr>
<tr>
<td align="center">HP:0100651 Type I diabetes mellitus</td>
<td align="char" char=".">0.29</td>
</tr>
<tr>
<td align="center">HP:0001513 Obesity</td>
<td align="char" char=".">0.29</td>
</tr>
<tr>
<td align="center">HP:0000077 Abnormality of the kidney</td>
<td align="char" char=".">0.26</td>
</tr>
<tr>
<td align="center">HP:0001392 Abnormality of the liver</td>
<td align="char" char=".">0.2</td>
</tr>
<tr>
<td align="center">HP:0000855 Insulin resistance</td>
<td align="char" char=".">0.2</td>
</tr>
<tr>
<td align="center">HP:0001397 Hepatic steatosis</td>
<td align="char" char=".">0.18</td>
</tr>
<tr>
<td align="center">HP:0001626 Abnormality of the cardiovascular system</td>
<td align="char" char=".">0.15</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t008fn001">
<p>Results from finding similar HPO terms using occurrence of RSids as dimensions. The above results are representative, and the complete analysis, with the Java API, can be downloaded from the SNP Miner homepage.</p>
</fn>
</table-wrap-foot>
</table-wrap>
<p>Readers are encouraged to use the APIs developed to try out the complete analysis using both SNPs and protein mutations.</p>
</sec>
</sec>
<sec id="sec025">
<title>Conclusion and future work</title>
<p>In this work, protein mutations and SNPs were successfully mined from ClinicalTrials.gov. Additionally, mutations and clinical trials were associated with HPO and MeSH ontologies. The benefits of using ontologies to help normalize free-formed text were demonstrated, and the mapping from MeSH to HPO also enabled the finding of genes associated with the HPO term. Unique reports for each mutation and clinical trial were created, helping researchers mine associations between mutations, genes, and diseases. These reports are freely available on the web, along with APIs (Java and Google Colab notebooks) for programmatic access. Further, the publicly-available site (<ext-link ext-link-type="uri" xlink:href="http://snpminertrails.com" xlink:type="simple">http://snpminertrails.com</ext-link>) contains analysis at multiple time points, further providing researchers with longitudinal information about clinical trials and associated entities, as well as demonstrating the reproducibility of the methods. The programmatic access of the data connecting SNPs and protein mutations with MeSH and HPO terms can also be useful for machine learning, as demonstrated above.</p>
<p>Future work would enhance the developed framework to include other mutation types and generate further insights from ClinicalTrials.gov data. This framework, utilizing the created pipeline, can additionally be applied to other scientific corpora, such as PubMed [<xref ref-type="bibr" rid="pone.0233438.ref051">51</xref>] and PubMed Central [<xref ref-type="bibr" rid="pone.0233438.ref052">52</xref>], another area of future work. Additional insights can be obtained by extracting biomedical entities from the clinical trials corpus. For e.g., U.S. Food and Drug Administration (FDA), Center for Biologics Evaluation and Research (CBER), and Center for Drug Evaluation and Research (CDER) [<xref ref-type="bibr" rid="pone.0233438.ref053">53</xref>] have a rich repository of drug information.</p>
</sec>
<sec id="sec026">
<title>Supporting information</title>
<supplementary-material id="pone.0233438.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.s001" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Screen shots of SNPMiner homepage, various reports, and API toolkts.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0233438.s002" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0233438.s002" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>Graphs of different analysis reports.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>The author would like to thank Ayush Alag, Princeton University, for his valuable feedback on the manuscript and guidance during the project. Further, the author would like to thank Dr. Eric Nelson, The Harker School, for his encouragement on the project and valuable feedback on the manuscript.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0233438.ref001">
<label>1</label>
<mixed-citation publication-type="other" xlink:type="simple">What are single nucleotide polymorphisms (SNPs)? Available at: <ext-link ext-link-type="uri" xlink:href="https://ghr.nlm.nih.gov/primer/genomicresearch/snp" xlink:type="simple">https://ghr.nlm.nih.gov/primer/genomicresearch/snp</ext-link>. Accessed March 2020</mixed-citation>
</ref>
<ref id="pone.0233438.ref002">
<label>2</label>
<mixed-citation publication-type="other" xlink:type="simple">Wetterstrand KA. DNA Sequencing Costs: Data from the NHGRI Genome Sequencing Program (GSP) Available at: <ext-link ext-link-type="uri" xlink:href="http://www.genome.gov/sequencingcostsdata" xlink:type="simple">www.genome.gov/sequencingcostsdata</ext-link>. Accessed August 2019</mixed-citation>
</ref>
<ref id="pone.0233438.ref003">
<label>3</label>
<mixed-citation publication-type="other" xlink:type="simple">What are genome-wide association studies? NIH Genetics Home Reference. <ext-link ext-link-type="uri" xlink:href="https://ghr.nlm.nih.gov/primer/genomicresearch/gwastudies" xlink:type="simple">https://ghr.nlm.nih.gov/primer/genomicresearch/gwastudies</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref004">
<label>4</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Yepes</surname> <given-names>AJ</given-names></name>, <name name-style="western"><surname>MacKinlay</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Gunn</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Schieber</surname> <given-names>C.</given-names></name>, <name name-style="western"><surname>Faux</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>Downton</surname> <given-names>M.</given-names></name>, <etal>et al</etal>. <chapter-title>A hybrid approach for automated mutation annotation of the extended human mutation landscape in the scientific literature</chapter-title>. <source>AMIA Annu Symp Proc</source>. <year>2018</year>;<volume>2018</volume>:<fpage>616</fpage>–<lpage>623</lpage>. Published 2018 Dec 5. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6371299/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6371299/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref005">
<label>5</label>
<mixed-citation publication-type="other" xlink:type="simple">Clinicaltrials.gov Available at: <ext-link ext-link-type="uri" xlink:href="https://clinicaltrials.gov/" xlink:type="simple">https://clinicaltrials.gov/</ext-link>. Accessed August 2019</mixed-citation>
</ref>
<ref id="pone.0233438.ref006">
<label>6</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zhang</surname> <given-names>X. A.</given-names></name>, <name name-style="western"><surname>Yates</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Vasilevsky</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>Gourdine</surname> <given-names>J. P.</given-names></name>, <name name-style="western"><surname>Callahan</surname> <given-names>T. J.</given-names></name>, <name name-style="western"><surname>Carmody</surname> <given-names>L. C.</given-names></name>, <etal>et al</etal>. <article-title>Available at Semantic integration of clinical laboratory tests from electronic health records for deep phenotyping and biomarker discovery</article-title>. <source>NPJ digital medicine</source>, <volume>2</volume>, <issue>32</issue>. (<year>2019</year>).</mixed-citation>
</ref>
<ref id="pone.0233438.ref007">
<label>7</label>
<mixed-citation publication-type="other" xlink:type="simple">The international standard for identifying health measurements, observations, and documents. Available at <ext-link ext-link-type="uri" xlink:href="https://loinc.org/" xlink:type="simple">https://loinc.org/</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref008">
<label>8</label>
<mixed-citation publication-type="book" xlink:type="simple">The Human Phenotype Ontology Available at <ext-link ext-link-type="uri" xlink:href="https://hpo.jax.org/app/" xlink:type="simple">https://hpo.jax.org/app/</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref009">
<label>9</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Gandy</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Gumm</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Blackford</surname> <given-names>AL</given-names></name>, <name name-style="western"><surname>Fertig</surname> <given-names>EJ</given-names></name>, <name name-style="western"><surname>Diaz</surname> <given-names>LA</given-names> <suffix>Jr</suffix></name>. <article-title>A Software Application for Mining and Presenting Relevant Cancer Clinical Trials per Cancer Mutation</article-title>. <source>Cancer Inform</source>. <year>2017</year>;<volume>16</volume>:1176935117711940. Published 2017 Jun 22.</mixed-citation>
</ref>
<ref id="pone.0233438.ref010">
<label>10</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Xu</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>HJ</given-names></name>, <name name-style="western"><surname>Zeng</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>Y.</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>Y.</given-names></name>, <name name-style="western"><surname>Huang</surname> <given-names>LC</given-names></name>, <etal>et al</etal>. <article-title>Extracting genetic alteration information for personalized cancer therapy from ClinicalTrials.gov</article-title>. <source>J Am Med Inform Assoc</source>. <year>2016</year>;<volume>23</volume>(<issue>4</issue>):<fpage>750</fpage>–<lpage>757</lpage>. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4926744/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4926744/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref011">
<label>11</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Su</surname> <given-names>EW</given-names></name>, <name name-style="western"><surname>Sanger</surname> <given-names>TM</given-names></name>. <article-title>Systematic drug repositioning through mining adverse event data in ClinicalTrials.gov</article-title>. <source>PeerJ</source>. <year>2017</year>;<volume>5</volume>:<fpage>e3154</fpage>. Published 2017 Mar 23. Reference: <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5366063/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5366063/</ext-link> <object-id pub-id-type="pmid">28348935</object-id></mixed-citation>
</ref>
<ref id="pone.0233438.ref012">
<label>12</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Pradhan</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Hoaglin</surname> <given-names>DC</given-names></name>, <name name-style="western"><surname>Cornell</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Yu</surname> <given-names>H</given-names></name>. <article-title>Automatic extraction of quantitative data from ClinicalTrials.gov to conduct meta-analyses</article-title>. <source>Journal of Clinical Epidemiology</source>. <volume>105</volume>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref013">
<label>13</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Sfakianaki</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Koumakis</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Sfakianakis</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>Semantic biomedical resource discovery: a Natural Language Processing framework</article-title>. <source>BMC Med Inform Decis Mak</source>. <year>2015</year>;<volume>15</volume>:<fpage>77</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12911-015-0200-4" xlink:type="simple">10.1186/s12911-015-0200-4</ext-link></comment> <object-id pub-id-type="pmid">26423616</object-id></mixed-citation>
</ref>
<ref id="pone.0233438.ref014">
<label>14</label>
<mixed-citation publication-type="other" xlink:type="simple">What Are RS Numbers (Rsid)? <ext-link ext-link-type="uri" xlink:href="https://customercare.23andme.com/hc/en-us/articles/212196908-What-Are-RS-Numbers-Rsid-" xlink:type="simple">https://customercare.23andme.com/hc/en-us/articles/212196908-What-Are-RS-Numbers-Rsid-</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref015">
<label>15</label>
<mixed-citation publication-type="other" xlink:type="simple">NIH MeSH <ext-link ext-link-type="uri" xlink:href="https://meshb.nlm.nih.gov/search" xlink:type="simple">https://meshb.nlm.nih.gov/search</ext-link>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref016">
<label>16</label>
<mixed-citation publication-type="other" xlink:type="simple">Provides a link between genes and HPO terms. All phenotype terms associated with any disease that is associated with variants in a gene are assigned to that gene in this file. <ext-link ext-link-type="uri" xlink:href="https://hpo.jax.org/app/download/annotation" xlink:type="simple">https://hpo.jax.org/app/download/annotation</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref017">
<label>17</label>
<mixed-citation publication-type="other" xlink:type="simple">Clinical trials XML schema <ext-link ext-link-type="uri" xlink:href="https://clinicaltrials.gov/ct2/html/images/info/public.xsd" xlink:type="simple">https://clinicaltrials.gov/ct2/html/images/info/public.xsd</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref018">
<label>18</label>
<mixed-citation publication-type="other" xlink:type="simple">Global Alliance for Genomic Health <ext-link ext-link-type="uri" xlink:href="https://www.ga4gh.org/" xlink:type="simple">https://www.ga4gh.org/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref019">
<label>19</label>
<mixed-citation publication-type="other" xlink:type="simple">The OBO Flat File Format Specification, version 1.2 <ext-link ext-link-type="uri" xlink:href="http://owlcollab.github.io/oboformat/doc/GO.format.obo-12.html" xlink:type="simple">http://owlcollab.github.io/oboformat/doc/GO.format.obo-12.html</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref020">
<label>20</label>
<mixed-citation publication-type="other" xlink:type="simple">NCBI MeSH <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/mesh" xlink:type="simple">https://www.ncbi.nlm.nih.gov/mesh</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref021">
<label>21</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Dunnen</surname> <given-names>J. T.</given-names></name>, <name name-style="western"><surname>Dalgleish</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Maglott</surname> <given-names>D. R.</given-names></name>, <name name-style="western"><surname>Hart</surname> <given-names>R. K.</given-names></name>, <name name-style="western"><surname>Greenblatt</surname> <given-names>M. S.</given-names></name>, <name name-style="western"><surname>McGowan‐Jordan</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Roux</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Smith</surname> <given-names>T.</given-names></name>, <name name-style="western"><surname>Antonarakis</surname> <given-names>S. E.</given-names></name> and <name name-style="western"><surname>Taschner</surname> <given-names>P. E</given-names></name>. <article-title>HGVS Recommendations for the Description of Sequence Variants: 2016 Update</article-title>. <source>Human Mutation</source>, <volume>37</volume>: <fpage>564</fpage>–<lpage>569</lpage> <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/humu.22981" xlink:type="simple">10.1002/humu.22981</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0233438.ref022">
<label>22</label>
<mixed-citation publication-type="other" xlink:type="simple">Sequence Variant Nomenclature. <ext-link ext-link-type="uri" xlink:href="https://varnomen.hgvs.org/" xlink:type="simple">https://varnomen.hgvs.org/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref023">
<label>23</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Ogino</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Gulley</surname> <given-names>ML</given-names></name>, <name name-style="western"><surname>den Dunnen</surname> <given-names>JT</given-names></name>, <name name-style="western"><surname>Wilson RB; Association for Molecular Pathology Training and Education</surname> <given-names>Committee</given-names></name>. <article-title>Standard mutation nomenclature in molecular diagnostics: practical and educational challenges [published correction appears in J Mol Diagn. 2009 Sep 1;11(5):494]</article-title>. <source>J Mol Diagn</source>. <year>2007</year>;<volume>9</volume>(<issue>1</issue>):<fpage>1</fpage>–<lpage>6</lpage>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref024">
<label>24</label>
<mixed-citation publication-type="other" xlink:type="simple">dbSNP. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/snp/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/snp/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref025">
<label>25</label>
<mixed-citation publication-type="other" xlink:type="simple">dbSNP rs35652124. <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/snp/?term=rs35652124" xlink:type="simple">https://www.ncbi.nlm.nih.gov/snp/?term=rs35652124</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref026">
<label>26</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Landrum</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Benson</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>ClinVar: improving access to variant interpretations and supporting evidence</article-title>. <source>Nucleic Acids Res</source>. <year>2018</year>;<volume>46</volume>(<issue>D1</issue>):<fpage>D1062</fpage>–<lpage>D1067</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/nar/gkx1153" xlink:type="simple">10.1093/nar/gkx1153</ext-link></comment> <object-id pub-id-type="pmid">29165669</object-id></mixed-citation>
</ref>
<ref id="pone.0233438.ref027">
<label>27</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Caporaso</surname> <given-names>JG</given-names></name>, <name name-style="western"><surname>Baumgartner</surname> <given-names>WA</given-names> <suffix>Jr</suffix></name>, <name name-style="western"><surname>Randolph</surname> <given-names>DA</given-names></name>, <name name-style="western"><surname>Cohen</surname> <given-names>KB</given-names></name>, <name name-style="western"><surname>Hunter</surname> <given-names>L</given-names></name>. <article-title>MutationFinder: a high-performance system for extracting point mutation mentions from text</article-title>. <source>Bioinformatics</source>. <year>2007</year>;<volume>23</volume>(<issue>14</issue>):<fpage>1862</fpage>–<lpage>1865</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btm235" xlink:type="simple">10.1093/bioinformatics/btm235</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0233438.ref028">
<label>28</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Naderi</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Witte</surname> <given-names>R</given-names></name>. <article-title>Automated extraction and semantic analysis of mutation impacts from the biomedical literature</article-title>. <source>BMC Genomics</source>. <year>2012</year>;<volume>13</volume> Suppl 4(<issue>Suppl 4</issue>):<fpage>S10</fpage>. Published 2012 Jun 18. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/1471-2164-13-S4-S10" xlink:type="simple">10.1186/1471-2164-13-S4-S10</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0233438.ref029">
<label>29</label>
<mixed-citation publication-type="other" xlink:type="simple">Gene Ontology <ext-link ext-link-type="uri" xlink:href="http://www.geneontology.org/" xlink:type="simple">http://www.geneontology.org/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref030">
<label>30</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Thomas</surname> <given-names>P.</given-names></name>, <name name-style="western"><surname>Rocktäschel</surname> <given-names>T.</given-names></name>, <name name-style="western"><surname>Hakenberg</surname> <given-names>J.</given-names></name>, <name name-style="western"><surname>Mayer</surname> <given-names>L.</given-names></name>, and <name name-style="western"><surname>Leser</surname> <given-names>U</given-names></name>. <article-title>SETH detects and normalizes genetic variants in text</article-title>. <source>Bioinformatics</source> (<year>2016</year>) <ext-link ext-link-type="uri" xlink:href="http://rockt.github.io/SETH/" xlink:type="simple">http://rockt.github.io/SETH/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref031">
<label>31</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wei</surname> <given-names>CH</given-names></name>, <name name-style="western"><surname>Harris</surname> <given-names>BR</given-names></name>, <name name-style="western"><surname>Kao</surname> <given-names>HY</given-names></name>, <name name-style="western"><surname>Lu</surname> <given-names>Z</given-names></name>. <article-title>tmVar: a text mining approach for extracting sequence variants in biomedical literature</article-title>. <source>Bioinformatics</source>. <year>2013</year>;<volume>29</volume>(<issue>11</issue>):<fpage>1433</fpage>–<lpage>1439</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btt156" xlink:type="simple">10.1093/bioinformatics/btt156</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0233438.ref032">
<label>32</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wei</surname> <given-names>CH</given-names></name>, <name name-style="western"><surname>Phan</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Feltz</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Maiti</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Hefferon</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Lu</surname> <given-names>Z</given-names></name>. <article-title>tmVar 2.0: integrating genomic variant information from literature with dbSNP and ClinVar for precision medicine</article-title>. <source>Bioinformatics</source>. <year>2018</year>;<volume>34</volume>(<issue>1</issue>):<fpage>80</fpage>–<lpage>87</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/bioinformatics/btx541" xlink:type="simple">10.1093/bioinformatics/btx541</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0233438.ref033">
<label>33</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Yepes</surname> <given-names>JA</given-names></name>, <name name-style="western"><surname>Verspoor</surname> <given-names>K</given-names></name>. <article-title>Mutation extraction tools can be combined for robust recognition of genetic variants in the literature</article-title>. <source>F1000Res</source>. <year>2014</year>;<volume>3</volume>:<issue>18</issue>. Published 2014 Jan 21.</mixed-citation>
</ref>
<ref id="pone.0233438.ref034">
<label>34</label>
<mixed-citation publication-type="other" xlink:type="simple">Oracle: Parsing an XML File Using SAX <ext-link ext-link-type="uri" xlink:href="https://docs.oracle.com/javase/tutorial/jaxp/sax/parsing.html" xlink:type="simple">https://docs.oracle.com/javase/tutorial/jaxp/sax/parsing.html</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref035">
<label>35</label>
<mixed-citation publication-type="other" xlink:type="simple">Welcome to Apache OpenNLP <ext-link ext-link-type="uri" xlink:href="https://opennlp.apache.org/" xlink:type="simple">https://opennlp.apache.org/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref036">
<label>36</label>
<mixed-citation publication-type="other" xlink:type="simple">Bootstrap <ext-link ext-link-type="uri" xlink:href="https://getbootstrap.com/" xlink:type="simple">https://getbootstrap.com/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref037">
<label>37</label>
<mixed-citation publication-type="other" xlink:type="simple">Start Building on AWS Today <ext-link ext-link-type="uri" xlink:href="https://aws.amazon.com/" xlink:type="simple">https://aws.amazon.com/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref038">
<label>38</label>
<mixed-citation publication-type="other" xlink:type="simple">What is Colaboratory? <ext-link ext-link-type="uri" xlink:href="https://colab.research.google.com/notebooks/intro.ipynb" xlink:type="simple">https://colab.research.google.com/notebooks/intro.ipynb</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref039">
<label>39</label>
<mixed-citation publication-type="other" xlink:type="simple">Diabetes Mellitus, Type 1. <ext-link ext-link-type="uri" xlink:href="https://meshb.nlm.nih.gov/record/ui?ui=D003922" xlink:type="simple">https://meshb.nlm.nih.gov/record/ui?ui=D003922</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref040">
<label>40</label>
<mixed-citation publication-type="other" xlink:type="simple">Diabetes Mellitus HP:0000819. <ext-link ext-link-type="uri" xlink:href="https://hpo.jax.org/app/browse/term/HP:0000819" xlink:type="simple">https://hpo.jax.org/app/browse/term/HP:0000819</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref041">
<label>41</label>
<mixed-citation publication-type="other" xlink:type="simple">What is Hierarchical Clustering? <ext-link ext-link-type="uri" xlink:href="https://www.kdnuggets.com/2019/09/hierarchical-clustering.html" xlink:type="simple">https://www.kdnuggets.com/2019/09/hierarchical-clustering.html</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref042">
<label>42</label>
<mixed-citation publication-type="other" xlink:type="simple">K Means <ext-link ext-link-type="uri" xlink:href="https://stanford.edu/cpiech/cs221/handouts/kmeans.html" xlink:type="simple">https://stanford.edu/cpiech/cs221/handouts/kmeans.html</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref043">
<label>43</label>
<mixed-citation publication-type="other" xlink:type="simple">Alag, S. Collective Intelligence in Action, 2008 ISBN: 1933988312, Manning Publications Co.</mixed-citation>
</ref>
<ref id="pone.0233438.ref044">
<label>44</label>
<mixed-citation publication-type="other" xlink:type="simple">rs12979860: SNPedia <ext-link ext-link-type="uri" xlink:href="https://www.snpedia.com/index.php/Rs12979860" xlink:type="simple">https://www.snpedia.com/index.php/Rs12979860</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref045">
<label>45</label>
<mixed-citation publication-type="other" xlink:type="simple">rs8099917: SNPedia <ext-link ext-link-type="uri" xlink:href="https://www.snpedia.com/index.php/Rs8099917" xlink:type="simple">https://www.snpedia.com/index.php/Rs8099917</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref046">
<label>46</label>
<mixed-citation publication-type="other" xlink:type="simple">rs6971: SNPedia <ext-link ext-link-type="uri" xlink:href="https://www.snpedia.com/index.php/Rs6971" xlink:type="simple">https://www.snpedia.com/index.php/Rs6971</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref047">
<label>47</label>
<mixed-citation publication-type="other" xlink:type="simple">rs9939609: SNPedia <ext-link ext-link-type="uri" xlink:href="https://www.snpedia.com/index.php/Rs9939609" xlink:type="simple">https://www.snpedia.com/index.php/Rs9939609</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref048">
<label>48</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Khalid</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Yasar</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Khalid</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Spiro</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Haddad</surname> <given-names>A</given-names></name>, <etal>et al</etal>. <article-title>Management of Atrial Fibrillation in Patients on Ibrutinib: A Cleveland Clinic Experience</article-title>. <source>Cureus</source>. <year>2018</year> <month>May</month>; <volume>10</volume>(<issue>5</issue>): <fpage>e2701</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.7759/cureus.2701" xlink:type="simple">10.7759/cureus.2701</ext-link></comment> <object-id pub-id-type="pmid">30062075</object-id></mixed-citation>
</ref>
<ref id="pone.0233438.ref049">
<label>49</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wu</surname> <given-names>Q</given-names></name>, <name name-style="western"><surname>Lian</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Yu</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>T</given-names></name> <article-title>Alleviation of Symptoms and Improvement of Endometrial Receptivity Following Laparoscopic Adenomyoma Excision and Secondary Therapy with the Levonorgestrel-releasing Intrauterine System</article-title>. <source>Reprod Sci</source>. <year>2020</year> <month>Jan</month> <day>6</day>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s43032-019-00130-4" xlink:type="simple">10.1007/s43032-019-00130-4</ext-link></comment></mixed-citation>
</ref>
<ref id="pone.0233438.ref050">
<label>50</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Papathanasiou</surname> <given-names>IV</given-names></name>, <name name-style="western"><surname>Kelepouris</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Valari</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Papagiannis</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Tzavella</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Kourkouta</surname> <given-names>L</given-names></name>, <etal>et. al</etal> <article-title>Depression, anxiety and stress among patients with hematological malignancies and the association with quality of life: a cross-sectional study</article-title>. <source>Med Pharm Rep</source>. <year>2020</year> <month>Jan</month>;<volume>93</volume>(<issue>1</issue>):<fpage>62</fpage>–<lpage>68</lpage>.</mixed-citation>
</ref>
<ref id="pone.0233438.ref051">
<label>51</label>
<mixed-citation publication-type="other" xlink:type="simple">PubMed <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pubmed/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pubmed/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref052">
<label>52</label>
<mixed-citation publication-type="other" xlink:type="simple">PubMed Central <ext-link ext-link-type="uri" xlink:href="https://www.ncbi.nlm.nih.gov/pmc/" xlink:type="simple">https://www.ncbi.nlm.nih.gov/pmc/</ext-link></mixed-citation>
</ref>
<ref id="pone.0233438.ref053">
<label>53</label>
<mixed-citation publication-type="other" xlink:type="simple">U.S Food &amp; Drug Administration <ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/" xlink:type="simple">https://www.fda.gov/</ext-link></mixed-citation>
</ref>
</ref-list>
</back>
</article>