<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007</article-id>
<article-id pub-id-type="publisher-id">PONE-D-20-10776</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject><subj-group><subject>Machine learning algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Vascular medicine</subject><subj-group><subject>Blood pressure</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject><subj-group><subject>Deep learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Health care</subject><subj-group><subject>Health information technology</subject><subj-group><subject>Electronic medical records</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Information technology</subject><subj-group><subject>Health information technology</subject><subj-group><subject>Electronic medical records</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Health care</subject><subj-group><subject>Patients</subject><subj-group><subject>Outpatients</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Engineering and technology</subject><subj-group><subject>Measurement</subject><subj-group><subject>Time measurement</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Cardiology</subject><subj-group><subject>Arrhythmia</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Application of a time-series deep learning model to predict cardiac dysrhythmias in electronic health records</article-title>
<alt-title alt-title-type="running-head">Predict cardiac dysrhythmias by deep learning</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Guo</surname>
<given-names>Aixia</given-names>
</name>
<role content-type="https://casrai.org/credit/">Formal analysis</role>
<role content-type="https://casrai.org/credit/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Smith</surname>
<given-names>Sakima</given-names>
</name>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Khan</surname>
<given-names>Yosef M.</given-names>
</name>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Langabeer II</surname>
<given-names>James R.</given-names>
</name>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<name name-style="western">
<surname>Foraker</surname>
<given-names>Randi E.</given-names>
</name>
<role content-type="https://casrai.org/credit/">Conceptualization</role>
<role content-type="https://casrai.org/credit/">Supervision</role>
<role content-type="https://casrai.org/credit/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Institute for Informatics (I2), Washington University School of Medicine, St. Louis, MO, United States of America</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Department of Internal Medicine, The Ohio State University, Columbus, OH, United States of America</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Health Informatics and Analytics, Centers for Health Metrics and Evaluation, American Heart Association, Dallas, TX, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>School of Biomedical Informatics, Health Science Center at Houston, The University of Texas, Houston, TX, United States of America</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>Department of Internal Medicine, Washington University School of Medicine, St. Louis, MO, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Ramagopalan</surname>
<given-names>Sreeram V.</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>University of Oxford, UNITED KINGDOM</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">randi.foraker@wustl.edu</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>13</day>
<month>9</month>
<year>2021</year>
</pub-date>
<pub-date pub-type="collection">
<year>2021</year>
</pub-date>
<volume>16</volume>
<issue>9</issue>
<elocation-id>e0239007</elocation-id>
<history>
<date date-type="received">
<day>24</day>
<month>4</month>
<year>2020</year>
</date>
<date date-type="accepted">
<day>4</day>
<month>11</month>
<year>2020</year>
</date>
</history>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Guo et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0239007"/>
<abstract>
<sec id="sec001">
<title>Background</title>
<p>Cardiac dysrhythmias (CD) affect millions of Americans in the United States (US), and are associated with considerable morbidity and mortality. New strategies to combat this growing problem are urgently needed.</p>
</sec>
<sec id="sec002">
<title>Objectives</title>
<p>Predicting CD using electronic health record (EHR) data would allow for earlier diagnosis and treatment of the condition, thus improving overall cardiovascular outcomes. The Guideline Advantage (TGA) is an American Heart Association ambulatory quality clinical data registry of EHR data representing 70 clinics distributed throughout the US, and has been used to monitor outpatient prevention and disease management outcome measures across populations and for longitudinal research on the impact of preventative care.</p>
</sec>
<sec id="sec003">
<title>Methods</title>
<p>For this study, we represented all time-series cardiovascular health (CVH) measures and the corresponding data collection time points for each patient by numerical embedding vectors. We then employed a deep learning technique–long-short term memory (LSTM) model–to predict CD from the vector of time-series CVH measures by 5-fold cross validation and compared the performance of this model to the results of deep neural networks, logistic regression, random forest, and Naïve Bayes models.</p>
</sec>
<sec id="sec004">
<title>Results</title>
<p>We demonstrated that the LSTM model outperformed other traditional machine learning models and achieved the best prediction performance as measured by the average area under the receiver operator curve (AUROC): 0.76 for LSTM, 0.71 for deep neural networks, 0.66 for logistic regression, 0.67 for random forest, and 0.59 for Naïve Bayes. The most influential feature from the LSTM model were blood pressure.</p>
</sec>
<sec id="sec005">
<title>Conclusions</title>
<p>These findings may be used to prevent CD in the outpatient setting by encouraging appropriate surveillance and management of CVH.</p>
</sec>
</abstract>
<funding-group>
<funding-statement>The author(s) received no specific funding for this work.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="5"/>
<page-count count="13"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>The data are owned by a third party and the authors do not have permission to share the data. Requesting access to The Guideline Advantage (TGA) data must be done by contacting the American Heart Association via email <email xlink:type="simple">qualityresearch@heart.org</email>. The authors did not have any special access privileges that others would not have. The Python code related to the analyses can be found in Github repository: <ext-link ext-link-type="uri" xlink:href="https://github.com/aixiaguo/CD_prediction/blob/master/CD.py" xlink:type="simple">https://github.com/aixiaguo/CD_prediction/blob/master/CD.py</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec006" sec-type="intro">
<title>Introduction</title>
<p>Cardiac dysrhythmia (CD) is a problem in which the heart has an irregular rhythm [<xref ref-type="bibr" rid="pone.0239007.ref001">1</xref>]. It affects millions of Americans in the United States (US) and approximately 25% of Americans older than 40 years develop a CD [<xref ref-type="bibr" rid="pone.0239007.ref002">2</xref>]. Six million people die annually due to sudden cardiac death caused by ventricular tachyarrhythmias (one type of CD) globally [<xref ref-type="bibr" rid="pone.0239007.ref003">3</xref>]. Risk factors which increase the chance of developing a CD include high blood pressure, diabetes and obesity. CD can be managed in the outpatient setting with medications or behavior change (i.e., diet or physical activity) or in the inpatient setting with cardiac procedures such as an ablation or cardioversion which can restore the rhythm back to normal. If diagnosed and managed appropriately, it can effectively reduce the risk of future blood clots (thrombus formation), heart failure and stroke (thromboembolic events) [<xref ref-type="bibr" rid="pone.0239007.ref004">4</xref>].</p>
<p>Electronic health records (EHR) contain longitudinal healthcare information of patients, including diagnoses, procedures, medications, lab tests and imaging data [<xref ref-type="bibr" rid="pone.0239007.ref005">5</xref>], which could be used for discovering the relationships and predicting patterns from data. For example, a study reported that CD was negatively associated with type II diabetes [<xref ref-type="bibr" rid="pone.0239007.ref006">6</xref>]. Atrial fibrillation (AF) is the most common CD, impacting over 6 million Americans, and multiple factors including clinical, genetic and environmental factors were found to have associations with AF [<xref ref-type="bibr" rid="pone.0239007.ref007">7</xref>–<xref ref-type="bibr" rid="pone.0239007.ref009">9</xref>]. For example, a risk model using data from outpatient clinics (Vanderbilt University Medical Center) predicted AF with demographic information, blood pressure, and smoking status [<xref ref-type="bibr" rid="pone.0239007.ref010">10</xref>]. In this analysis, traditional machine learning algorithms such as Naïve Bayes (NB), support vector machines (SVM) and random forest (RF) [<xref ref-type="bibr" rid="pone.0239007.ref011">11</xref>] along with newly developed algorithms [<xref ref-type="bibr" rid="pone.0239007.ref012">12</xref>] were applied to identify AF using EHR data. In the case of ventricular arrhythmias, informative clinical variables such as blood pressure, treadmill exercise time, and body mass index (BMI) predicted among hypertrophic cardiomyopathy patients using some traditional machine learning algorithms, including RF and logistic regression (LR) [<xref ref-type="bibr" rid="pone.0239007.ref013">13</xref>].</p>
<p>Recently, deep learning algorithms have grown in popularity for data-driven prediction models. Such models can effectively learn from experience by capturing features and dependencies in longitudinal data and have achieved great success in bioinformatics and healthcare fields [<xref ref-type="bibr" rid="pone.0239007.ref014">14</xref>–<xref ref-type="bibr" rid="pone.0239007.ref017">17</xref>]. For example, scalable deep learning methods were developed to accurately predict medical events from two academic medical centers’ EHR data and achieved high accuracy in prediction tasks [<xref ref-type="bibr" rid="pone.0239007.ref018">18</xref>]. In this paper, we applied a long-short term memory (LSTM) model [<xref ref-type="bibr" rid="pone.0239007.ref019">19</xref>] on time-series EHR data to explore the contribution of modifiable cardiovascular risk factors to the development of CD in the outpatient setting. Central to our analysis was the characterization of cardiovascular health (CVH) and CD outcomes using EHR data from clinics across the US. We evaluated the association between time-series CVH and CD diagnoses, and hypothesized that CD could be predicted using data commonly recorded in the EHR. To our best knowledge, it is also the first time that deep learning algorithms have been applied to predict CD using time-series EHR data.</p>
</sec>
<sec id="sec007" sec-type="materials|methods">
<title>Methods</title>
<sec id="sec008">
<title>Ethics statement</title>
<p>All the data were fully anonymized before we accessed them. Our study was approved by the Institutional Review Board at the Washington University School of Medicine in St. Louis. We obtained a written acknowledgement of proprietary rights and non-disclosure and data use agreement from the American Heart Association (The Washington University_NDA_DUA_CONTRACTID 158065_2019.04.26_K).</p>
<p>Established in 2011, The Guideline Advantage (TGA) was a clinical data registry jointly operated by the American Cancer Society, the American Diabetes Association, and the American Heart Association [<xref ref-type="bibr" rid="pone.0239007.ref020">20</xref>]. The program collects EHR data to track and monitor outpatient prevention and disease management. Briefly, the data collected through TGA from over 70 clinics provide a unique platform for longitudinal research on the impact of preventative care. The program’s research strategy is focused on identifying patient-, provider-, and practice-level factors associated with guideline adherence and assessing the effectiveness of quality improvement interventions in increasing guideline adherence. Here we used TGA data to predict the diagnosis of CD among 362,533 unique patients in the data set.</p>
<p>Our data set represented patients seen in the outpatient setting over a 10-year period (2007 to 2016). We defined our study outcome by classifying 19,597 unique ICD-9 and ICD-10 codes to a smaller number of clinically meaningful categories using Clinical Classifications Software (CCS) [<xref ref-type="bibr" rid="pone.0239007.ref021">21</xref>]. After the codes were converted to the appropriate CCS category, we identified 34,511 patients with a diagnosis of CD (single level CCS code = 106). Among them, the majority (55%) were female patients, and 66% of patients were white. If a patient had multiple CD diagnoses in the data set, only the earliest one was considered.</p>
<p>Next, we extracted all measurements of CVH prior to the diagnosis of CD. We utilized measures of CVH as follows: smoking status, body mass index (BMI), blood pressure, hemoglobin A1c, and cholesterol, which were defined and classified by the AHA into three categories: ideal, intermediate, or poor according to <bold><xref ref-type="table" rid="pone.0239007.t001">Table 1</xref></bold>. To classify patients as intermediate health or treated-to-goal for selected CVH submetrics (<bold><xref ref-type="table" rid="pone.0239007.t001">Table 1</xref></bold>), we converted the drug names to their drug classes by comparing the drug names in our dataset with the Multum drug database [<xref ref-type="bibr" rid="pone.0239007.ref022">22</xref>]. One string match technique–Levenshtein distance algorithm [<xref ref-type="bibr" rid="pone.0239007.ref023">23</xref>]–was applied and we considered the distance between the two matched strings as less than five to be matched and included these in subsequent analyses.</p>
<table-wrap id="pone.0239007.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.t001</object-id>
<label>Table 1</label> <caption><title>Measures of CVH which are available in the EHR (adapted from: Lloyd-Jones, 2011) [<xref ref-type="bibr" rid="pone.0239007.ref024">24</xref>].</title></caption>
<alternatives>
<graphic id="pone.0239007.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.t001" xlink:type="simple"/>
<table cellspacing="0" style="border-collapse:collapse">
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="justify"/>
<th align="justify">Poor Health</th>
<th align="justify">Intermediate Health</th>
<th align="justify" style="background-color:#BFBFBF">Ideal Health</th>
</tr>
</thead>
<tbody>
<tr>
<td align="justify" style="background-color:#F2F2F2">Health Behaviors</td>
<td align="justify" style="background-color:#F2F2F2"/>
<td align="justify" style="background-color:#F2F2F2"/>
<td align="justify" style="background-color:#F2F2F2"/>
</tr>
<tr>
<td align="justify" style="border-bottom:dotted">Smoking status</td>
<td align="justify" style="border-bottom:dotted">Yes</td>
<td align="justify" style="border-bottom:dotted">Former ≤ 12 months</td>
<td align="justify" style="background-color:#BFBFBF;border-bottom:dotted">Never or quit &gt; 12 months</td>
</tr>
<tr>
<td align="justify" style="border-top:dotted">Body mass index</td>
<td align="justify" style="border-top:dotted">≥ 30 kg/m<sup>2</sup></td>
<td align="justify" style="border-top:dotted">25–29.9 kg/m<sup>2</sup></td>
<td align="justify" style="background-color:#BFBFBF;border-top:dotted">&lt; 25 kg/m<sup>2</sup></td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">Health Factors</td>
<td align="justify" style="background-color:#F2F2F2"/>
<td align="justify" style="background-color:#F2F2F2"/>
<td align="justify" style="background-color:#F2F2F2"/>
</tr>
<tr>
<td align="justify" style="border-bottom:dotted">Total cholesterol</td>
<td align="justify" style="border-bottom:dotted">≥ 240 mg/dL</td>
<td align="justify" style="border-bottom:dotted">200–239 mg/dL or treated to goal</td>
<td align="justify" style="background-color:#BFBFBF;border-bottom:dotted">&lt; 200 mg/dL</td>
</tr>
<tr>
<td align="justify" rowspan="2" style="border-top:dotted;border-bottom:dotted">Blood pressure</td>
<td align="justify" rowspan="2" style="border-top:dotted;border-bottom:dotted">Systolic ≥ 140 mm Hg or Diastolic ≥ 90 mm Hg</td>
<td align="justify" rowspan="2" style="border-top:dotted;border-bottom:dotted">Systolic 120–139 mm Hg or Diastolic 80–89 mm Hg or treated to goal</td>
<td align="justify" style="background-color:#BFBFBF;border-top:dotted;border-bottom:dotted">Systolic &lt; 120 mm Hg</td>
</tr>
<tr>
<td align="justify" style="background-color:#BFBFBF;border-top:dotted;border-bottom:dotted">Diastolic &lt; 80 mm Hg</td>
</tr>
<tr>
<td align="justify" style="border-top:dotted">Fasting plasma glucose</td>
<td align="justify" style="border-top:dotted">≥ 126 mg/dL</td>
<td align="justify" style="border-top:dotted">100–125 mg/dL or treated to goal</td>
<td align="justify" style="background-color:#BFBFBF;border-top:dotted">&lt; 100 mg/dL</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>We studied patients with CD who had four or more outpatient CVH measures in the data set (n = 5,271). Using the same criteria, we randomly selected 5,784 patients from the dataset who did not have a diagnosis of CD. In sensitivity analyses, we tested the robustness of our strategy by changing the number of outpatient CVH measures from zero to three, respectively. Ultimately, our data set comprised 11,055 patients who had four or more encounters over the 10-year study period.</p>
</sec>
<sec id="sec009">
<title>Statistical analysis</title>
<p>To prepare the CVH measures for analysis, we combined the submetric with its classification according to <bold><xref ref-type="table" rid="pone.0239007.t001">Table 1</xref></bold>. For example, if a patient had a measurement of “ideal” cholesterol, then we combined the submetric and its value as cholesterolideal. The resulting features were mapped to a 32-dimensional vector by word embeddings [<xref ref-type="bibr" rid="pone.0239007.ref025">25</xref>] in our model. The Genism Word2Vec model was configured the hyperparameters as following: size (embedding dimension) as 32, window (the maximum distance between a target word and all words around it) as 5, min_count (the minimum number of words counted when training the model) as 1, sg (the training algorithm) as CBOW (The continues bag of words). The input of Word2Vec model was all above combined measurements of all 11,055 patients. We also added time information for all measurements as time steps. Each feature was associated with a time point which was calculated by the difference in days between the corresponding visit time and the latest measurement time. For example, if the most recent visit date was February 11, 2019, and measurement was conducted on January 11, 2019, then the time point value is: 31. Thus, each individual patient had its own vector to represent their measurements of CVH.</p>
<p>The embedded vectors of patients were the inputs for our long short-term memory (LSTM) model. We applied an LSTM algorithm to investigate the association between time-series CVH measurements and the outcome of CD. We also investigated other machine learning and deep learning algorithms such as DNN [<xref ref-type="bibr" rid="pone.0239007.ref026">26</xref>], LR [<xref ref-type="bibr" rid="pone.0239007.ref027">27</xref>], RF [<xref ref-type="bibr" rid="pone.0239007.ref028">28</xref>] and NB [<xref ref-type="bibr" rid="pone.0239007.ref029">29</xref>] to study the same association between CVH and CD. All of the CVH measurements for each patient were sorted in chronological order. We padded the patients with virtual events as the same length (311) in the form of [0<sub>1</sub>, … 0<sub>k</sub>, event_1, event_(311-k)] if they had less events than the maximum number of measures (311), where k was the difference of 311 and number of records that patients had.</p>
<p>To investigate the effects by continuous vectors obtained from Word2Vec algorithm, we conducted the same predictions by using categorical variables. These categorical variables were sorted in a time order, and each categorical event concatenated with the same time points (e.g., difference in days between the corresponding visit time and the latest measurement time) were the inputs of the models of LSTM, DNN, RF, LR, and NB. We did the same padding approach as above for patients had less events than 311.</p>
<p>For each model of predictions, we utilized 5-fold cross validation by dividing dataset into 5 folds with each fold serving as a testing dataset and the remaining 4 folds as a training dataset. Criteria of the area under the receiver operator curve (AUROC) and other metrics, i.e., accuracy, sensitivity, precision, f1 score, and specificity were calculated to evaluate the performance of the models.</p>
</sec>
<sec id="sec010">
<title>LSTM unit</title>
<p>A common LSTM unit is composed of a cell and three gates: input gate, output gate and forget gate. The cell remembers information at each time step and these gates control the flow of information pass on to and forget/discard to the next time step [<xref ref-type="bibr" rid="pone.0239007.ref030">30</xref>]. We illustrated the basic structure of an LSTM unit as in <bold><xref ref-type="fig" rid="pone.0239007.g001">Fig 1</xref></bold>.</p>
<fig id="pone.0239007.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Graph illustration of LSTM unit.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.g001" xlink:type="simple"/>
</fig>
<p>Mathematically, the equations for forward pass to update an LSTM unit with a forget gate at a time <italic>t</italic> are:
<disp-formula id="pone.0239007.e001">
<alternatives>
<graphic id="pone.0239007.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e001" xlink:type="simple"/>
<mml:math display="block" id="M1">
<mml:mi>F</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>g</mml:mi><mml:mi>e</mml:mi><mml:mi>t</mml:mi><mml:mspace width="0.25em"/><mml:mi>g</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mspace width="0.50em"/><mml:msub><mml:mi mathvariant="bold-italic">f</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>σ</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">W</mml:mi><mml:mi>f</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">U</mml:mi><mml:mi>f</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">X</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">b</mml:mi><mml:mi>f</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e002">
<alternatives>
<graphic id="pone.0239007.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e002" xlink:type="simple"/>
<mml:math display="block" id="M2">
<mml:mi>I</mml:mi><mml:mi>n</mml:mi><mml:mi>p</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mspace width="0.25em"/><mml:mi>g</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mspace width="0.50em"/><mml:msub><mml:mi mathvariant="bold-italic">i</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>σ</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">W</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">U</mml:mi><mml:mi>i</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">X</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">b</mml:mi><mml:mi>i</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e003">
<alternatives>
<graphic id="pone.0239007.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e003" xlink:type="simple"/>
<mml:math display="block" id="M3">
<mml:msub><mml:mi mathvariant="bold-italic">C</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">f</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>*</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">C</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">i</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>*</mml:mo><mml:mo stretchy="false">(</mml:mo><mml:mi mathvariant="normal">tanh</mml:mi><mml:mspace width="0.25em"/><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">W</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">U</mml:mi><mml:mi>c</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">X</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">b</mml:mi><mml:mi>C</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e004">
<alternatives>
<graphic id="pone.0239007.e004g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e004" xlink:type="simple"/>
<mml:math display="block" id="M4">
<mml:mi>O</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mi>p</mml:mi><mml:mi>u</mml:mi><mml:mi>t</mml:mi><mml:mspace width="0.25em"/><mml:mi>g</mml:mi><mml:mi>a</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mspace width="0.50em"/><mml:msub><mml:mi mathvariant="bold-italic">o</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>σ</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">W</mml:mi><mml:mi>o</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">h</mml:mi><mml:mrow><mml:mi>t</mml:mi><mml:mo>−</mml:mo><mml:mn>1</mml:mn></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">U</mml:mi><mml:mi>o</mml:mi></mml:msub><mml:msub><mml:mi mathvariant="bold-italic">X</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:msub><mml:mi mathvariant="bold-italic">b</mml:mi><mml:mi>o</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e005">
<alternatives>
<graphic id="pone.0239007.e005g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e005" xlink:type="simple"/>
<mml:math display="block" id="M5">
<mml:msub><mml:mi mathvariant="bold-italic">h</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>=</mml:mo><mml:mi>σ</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi mathvariant="bold-italic">o</mml:mi><mml:mi>t</mml:mi></mml:msub><mml:mo>*</mml:mo><mml:mi mathvariant="normal">tanh</mml:mi><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mi>C</mml:mi><mml:mi>t</mml:mi></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow><mml:mo>)</mml:mo></mml:mrow>
</mml:math>
</alternatives>
</disp-formula></p>
<p>Where * denotes the element-wise product and <bold><italic>X</italic></bold><sub><italic>t</italic></sub> is the input vector (i.e., embedding vector in our case) at time <italic>t</italic>. The weight matrices <bold><italic>W</italic></bold><sub><italic>f</italic></sub>, <bold><italic>W</italic></bold><sub><italic>i</italic></sub>, <bold><italic>W</italic></bold><sub><italic>c</italic></sub>, <bold>W</bold><sub>o</sub> for hidden state <bold><italic>h</italic></bold><sub><italic>t</italic></sub>, <bold><italic>U</italic></bold><sub><italic>f</italic></sub>, <bold><italic>U</italic></bold><sub><italic>i</italic></sub>, <bold><italic>U</italic></bold><sub><italic>c</italic></sub>, <bold><italic>U</italic></bold><sub><italic>o</italic></sub> matrices for input <bold><italic>X</italic></bold><sub><italic>t</italic></sub>, and bias vector parameters <bold><italic>b</italic></bold><sub><italic>f</italic></sub>, <bold><italic>b</italic></bold><sub><italic>i</italic></sub>, <bold><italic>b</italic></bold><sub><italic>c</italic></sub>, <bold><italic>b</italic></bold><sub><italic>o</italic></sub> are learned during the training stage and <bold><italic>h</italic></bold><sub><italic>t</italic></sub> is the hidden layer output vector. Activation function <italic>σ</italic> is the sigmoid function and <italic>tanh</italic> is the hyperbolic tangent function.</p>
<p>Our LSTM model comprised an input layer, one hidden layer (100 dimensions) and a scalar output layer. A binary cross-entropy loss function was employed as the output layer and a sigmoid function was used as the activation function for the hidden layer. Adam optimizer [<xref ref-type="bibr" rid="pone.0239007.ref031">31</xref>] was used to optimize the model with a mini-batch size of 64 samples. The DNN was comprised of an input layer, 5 hidden layer (with 256, 256, 128, 64 and 32 dimensions respectively). and a scalar output layer. We used the Sigmoid function [<xref ref-type="bibr" rid="pone.0239007.ref032">32</xref>] at the output layer and ReLu function at each hidden layer. Binary cross-entropy was used as loss function and Adam optimizer was used to optimize the models with a mini-batch size of 64 samples. The LR, RF and NB models were configured by default options in the package of Scikit-learn in Python 3.</p>
<p>We then investigated which features were the most important in CD prediction. To obtain this goal, we iterated the model 15 times by setting constant value for one feature each time. For each feature, we first manually set it as a constant (not informative for the predictive models), then tested the prediction performance of trained models using the manually changed features to evaluate the discriminative importance of the given feature. The resulting performance then was compared its prediction accuracy and AUROC with the full model. If there was a large change between these two values, it indicated that this feature was important and discriminative to the prediction. Analyses were conducted by using the libraries of Scikit-learn, Scipy, Matplotlib with Python, version 3.6.5 in 2019.</p>
</sec>
</sec>
<sec id="sec011" sec-type="results">
<title>Results</title>
<p>Our study population was 58% female and 53% white (<bold><xref ref-type="table" rid="pone.0239007.t002">Table 2</xref></bold>). Approximately 58% of women had been diagnosed with CD and around 60% of CD patients were white. Since patients had multiple encounters, there were multiple measures of CVH. The average number of measures for each patient was 24 and the median was 17.</p>
<table-wrap id="pone.0239007.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.t002</object-id>
<label>Table 2</label> <caption><title>Characteristics [mean, (SD) or n (%)] of the study population.</title></caption>
<alternatives>
<graphic id="pone.0239007.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<tbody>
<tr>
<td align="justify" style="background-color:#D9E2F3">Gender [n (%)]</td>
<td align="justify" style="background-color:#D9E2F3"/>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Female</td>
<td align="justify" style="background-color:#FFFFFF">6379 (57.7)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Male</td>
<td align="justify" style="background-color:#FFFFFF">4673 (42.3)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Other/Unknown</td>
<td align="justify" style="background-color:#FFFFFF">3 (0.0)</td>
</tr>
<tr>
<td align="justify" style="background-color:#D9E2F3">Gender with CD</td>
<td align="justify" style="background-color:#D9E2F3"/>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Female</td>
<td align="justify" style="background-color:#FFFFFF">3054 (57.9)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Male</td>
<td align="justify" style="background-color:#FFFFFF">2216 (42.0)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Other/Unknown</td>
<td align="justify" style="background-color:#FFFFFF">1 (0.0)</td>
</tr>
<tr>
<td align="justify" style="background-color:#D9E2F3">Race</td>
<td align="justify" style="background-color:#D9E2F3"/>
</tr>
<tr>
<td align="justify">    White</td>
<td align="justify">5876 (53.2)</td>
</tr>
<tr>
<td align="justify">    Non-white</td>
<td align="justify">5188 (46.9)</td>
</tr>
<tr>
<td align="justify">    Unknown</td>
<td align="justify">21 (0.2)</td>
</tr>
<tr>
<td align="justify" style="background-color:#D9E2F3">Race with CD</td>
<td align="justify" style="background-color:#D9E2F3"/>
</tr>
<tr>
<td align="justify">    White</td>
<td align="justify">3144 (59.6)</td>
</tr>
<tr>
<td align="justify">    Non-white</td>
<td align="justify">2129 (40.3)</td>
</tr>
<tr>
<td align="justify">    Unknown</td>
<td align="justify">14 (0.0)</td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">BMI (kg/m<sup>2</sup>)</td>
<td align="justify" style="background-color:#F2F2F2">29.6 (9.3)</td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">Systolic blood pressure (SBP, mmHg)</td>
<td align="justify" style="background-color:#F2F2F2">124.6 (19.4)</td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">Diastolic blood pressure (DBP, mmHg)</td>
<td align="justify" style="background-color:#F2F2F2">74.4 (14.9)</td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">Hemoglobin A1c (%)</td>
<td align="justify" style="background-color:#F2F2F2">7.11 (1.79)</td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">Total cholesterol (mg/dL)</td>
<td align="justify" style="background-color:#F2F2F2">105.2 (35.9)</td>
</tr>
<tr>
<td align="justify" style="background-color:#F2F2F2">Current smoking</td>
<td align="justify" style="background-color:#F2F2F2">2453 (22.2)</td>
</tr>
<tr>
<td align="justify" style="background-color:#D9E2F3">Number of measures</td>
<td align="justify" style="background-color:#D9E2F3"/>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Total measures</td>
<td align="justify" style="background-color:#FFFFFF">269475</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Maximum measures per patient</td>
<td align="justify" style="background-color:#FFFFFF">311</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Minimum measures per patient</td>
<td align="justify" style="background-color:#FFFFFF">5</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Average measures per patient</td>
<td align="justify" style="background-color:#FFFFFF">24</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">    Median measures per patient</td>
<td align="justify" style="background-color:#FFFFFF">17</td>
</tr>
<tr>
<td align="justify" style="background-color:#E7E6E6">Cardiac dysrhythmias (CD)</td>
<td align="justify" style="background-color:#E7E6E6">5271 (47.7)</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p><bold><xref ref-type="fig" rid="pone.0239007.g002">Fig 2</xref></bold> displays all the measures and results of two patient examples in which one was diagnosed as CD and the other was not.</p>
<fig id="pone.0239007.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Examples of CVH time series data.</title>
<p>(A) Patient was diagnosed with CD; (B) Patient not diagnosed with CD.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.g002" xlink:type="simple"/>
</fig>
<p><bold><xref ref-type="table" rid="pone.0239007.t003">Table 3</xref></bold> lists the numbers of ideal, intermediate and poor measurements for each submetric. As seen in <bold><xref ref-type="table" rid="pone.0239007.t003">Table 3</xref></bold>, patients without CD (39%) had a higher prevalence of ideal BMI compared to those with CD diagnoses (23%), and ideal blood pressure measurements followed the same pattern.</p>
<table-wrap id="pone.0239007.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.t003</object-id>
<label>Table 3</label> <caption><title>Characteristics [mean, (SD) or n (%)] of the converted dataset.</title></caption>
<alternatives>
<graphic id="pone.0239007.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.t003" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<tbody>
<tr>
<td align="justify" style="background-color:#8EAADB"/>
<td align="justify" style="background-color:#8EAADB">CD = Yes</td>
<td align="justify" style="background-color:#8EAADB">ideal</td>
<td align="justify" style="background-color:#8EAADB">intermediate</td>
<td align="justify" style="background-color:#8EAADB">poor</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total unique patients</td>
<td align="justify" style="background-color:#FFFFFF">5271</td>
<td align="justify" style="background-color:#FFFFFF"/>
<td align="justify" style="background-color:#FFFFFF"/>
<td align="justify" style="background-color:#FFFFFF"/>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total rows</td>
<td align="justify" style="background-color:#FFFFFF">128160</td>
<td align="justify" style="background-color:#FFFFFF">59315</td>
<td align="justify" style="background-color:#FFFFFF">31743</td>
<td align="justify" style="background-color:#FFFFFF">37102</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total A1C tests</td>
<td align="justify" style="background-color:#FFFFFF">6947</td>
<td align="justify" style="background-color:#FFFFFF">1073 (15.4)</td>
<td align="justify" style="background-color:#FFFFFF">2398 (34.5)</td>
<td align="justify" style="background-color:#FFFFFF">3476 (50.0)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total LDL tests</td>
<td align="justify" style="background-color:#FFFFFF">11732</td>
<td align="justify" style="background-color:#FFFFFF">9310 (79.4)</td>
<td align="justify" style="background-color:#FFFFFF">1617 (13.8)</td>
<td align="justify" style="background-color:#FFFFFF">805 (6.9)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total BMI tests</td>
<td align="justify" style="background-color:#FFFFFF">24532</td>
<td align="justify" style="background-color:#FFFFFF">5509 (22.5)</td>
<td align="justify" style="background-color:#FFFFFF">6870 (28.0)</td>
<td align="justify" style="background-color:#FFFFFF">12153 (49.5)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total BP tests</td>
<td align="justify" style="background-color:#FFFFFF">48118</td>
<td align="justify" style="background-color:#FFFFFF">15193 (31.6)</td>
<td align="justify" style="background-color:#FFFFFF">20798 (43.2)</td>
<td align="justify" style="background-color:#FFFFFF">12127 (25.2)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total Smoking status</td>
<td align="justify" style="background-color:#FFFFFF">36831</td>
<td align="justify" style="background-color:#FFFFFF">28230 (76.6)</td>
<td align="justify" style="background-color:#FFFFFF">60 (0.2)</td>
<td align="justify" style="background-color:#FFFFFF">8541 (23.2)</td>
</tr>
<tr>
<td align="justify" style="background-color:#8EAADB"/>
<td align="justify" style="background-color:#8EAADB">CD = No</td>
<td align="justify" style="background-color:#8EAADB">ideal</td>
<td align="justify" style="background-color:#8EAADB">intermediate</td>
<td align="justify" style="background-color:#8EAADB">poor</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total unique patients</td>
<td align="justify" style="background-color:#FFFFFF">5784</td>
<td align="justify" style="background-color:#FFFFFF"/>
<td align="justify" style="background-color:#FFFFFF"/>
<td align="justify" style="background-color:#FFFFFF"/>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total rows</td>
<td align="justify" style="background-color:#FFFFFF">141315</td>
<td align="justify" style="background-color:#FFFFFF">72013</td>
<td align="justify" style="background-color:#FFFFFF">32046</td>
<td align="justify" style="background-color:#FFFFFF">37256</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total A1C tests</td>
<td align="justify" style="background-color:#FFFFFF">6065</td>
<td align="justify" style="background-color:#FFFFFF">900 (14.8)</td>
<td align="justify" style="background-color:#FFFFFF">1669 (27.5)</td>
<td align="justify" style="background-color:#FFFFFF">3496 (57.6)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total LDL tests</td>
<td align="justify" style="background-color:#FFFFFF">9211</td>
<td align="justify" style="background-color:#FFFFFF">6917 (75.1)</td>
<td align="justify" style="background-color:#FFFFFF">1483 (16.1)</td>
<td align="justify" style="background-color:#FFFFFF">811 (8.8)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total BMI tests</td>
<td align="justify" style="background-color:#FFFFFF">31898</td>
<td align="justify" style="background-color:#FFFFFF">12358 (38.7)</td>
<td align="justify" style="background-color:#FFFFFF">6793 (21.3)</td>
<td align="justify" style="background-color:#FFFFFF">12747 (40.0)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total BP tests</td>
<td align="justify" style="background-color:#FFFFFF">56532</td>
<td align="justify" style="background-color:#FFFFFF">23654 (41.8)</td>
<td align="justify" style="background-color:#FFFFFF">21745 (38.5)</td>
<td align="justify" style="background-color:#FFFFFF">11133 (19.7)</td>
</tr>
<tr>
<td align="justify" style="background-color:#FFFFFF">Total Smoking status</td>
<td align="justify" style="background-color:#FFFFFF">37609</td>
<td align="justify" style="background-color:#FFFFFF">28184 (74.9)</td>
<td align="justify" style="background-color:#FFFFFF">356 (0.9)</td>
<td align="justify" style="background-color:#FFFFFF">9069 (24.1)</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>Word embeddings produced a vector representation of words which were the features of patients. <bold><xref ref-type="fig" rid="pone.0239007.g003">Fig 3</xref></bold> shows the embeddings visualization of all of the features projected to the first two components in the t-Distributed Stochastic Neighbor Embedding analysis (tSNE) [<xref ref-type="bibr" rid="pone.0239007.ref033">33</xref>]. TSNE is a machine learning technique for visualization by embedding high-dimensional data into a low-dimensional space (here is 2-dimensional space). The features closest to one another in the visualization can be thought of as being more highly correlated with one another.</p>
<fig id="pone.0239007.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Embedding visualization of the combination of measure submetric and measure values.</title>
<p>X and y-axes are the first two components in the t-Distributed Stochastic Neighbor Embedding (tSNE).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.g003" xlink:type="simple"/>
</fig>
<p>The LSTM model outperformed other machine learning models in the two cases: inputs with vectors from Word2Vec and inputs with categorical variables (i.e., without Word2Vec). The AUC of LSTM was 076 (std 0.01) while DNN was 0.71 (std 0.03), LR was 0.67 (std 0.01), RF was 0.66 (std 0.01) and NB was 0.59 (std 0.02) for the case with Word2Vec. For the case without Word2Vec, the AUC of LSTM was 0.69 (std 0.01) while DNN was 0.64 (std 0.02), LR was 0.65 (std 0.01), RF was 0.66 (std 0.01) and NB was 0.60 (std 0.01) (<bold><xref ref-type="fig" rid="pone.0239007.g004">Fig 4</xref></bold>). The accuracy of each model was 69% for LSTM compared to 66% for DNN, 64% for LR, 61% for RF, and 52% for NB for the case with Word2Vec (<bold><xref ref-type="table" rid="pone.0239007.t004">Table 4</xref></bold>). For the case without Word2Vec, the accuracy was 64% for LSTM, 61% for DNN, 62% for RF, 61% for LR, and 52% for NB.</p>
<fig id="pone.0239007.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.g004</object-id>
<label>Fig 4</label>
<caption>
<title>CD prediction performance by area under the curve (AUC) for LSTM, DNN, RF, LR, and NB models.</title>
<p>LSTM–long short-term memory; RF–random forest; NB–naïve Bayes.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.g004" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0239007.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.t004</object-id>
<label>Table 4</label> <caption><title>Model performance by metrics of 5-fold cross-validation mean (std).</title></caption>
<alternatives>
<graphic id="pone.0239007.t004g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.t004" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">Cases</th>
<th align="center">Models</th>
<th align="center">Accuracy</th>
<th align="center">Precision</th>
<th align="center">Recall</th>
<th align="center">f1</th>
<th align="center">Specificity</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="5"><bold>Case: Inputs with vectors by Word2Vec</bold></td>
<td align="center">LSTM</td>
<td align="center">0.69 (0.01)</td>
<td align="center">0.68 (0.02)</td>
<td align="center">0.66 (0.03)</td>
<td align="center">0.67 (0.02)</td>
<td align="center">0.72 (0.03)</td>
</tr>
<tr>
<td align="center">DNN</td>
<td align="center">0.66 (0.03)</td>
<td align="center">0.63 (0.01)</td>
<td align="center">0.69 (0.03)</td>
<td align="center">0.66 (0.01)</td>
<td align="center">0.63 (0.03)</td>
</tr>
<tr>
<td align="center">RF</td>
<td align="center">0.61 (0.01)</td>
<td align="center">0.59 (0.01)</td>
<td align="center">0.61 (0.04)</td>
<td align="center">0.6 (0.02)</td>
<td align="center">0.61 (0.02)</td>
</tr>
<tr>
<td align="center">LR</td>
<td align="center">0.64 (0.01)</td>
<td align="center">0.61 (0.01)</td>
<td align="center">0.64 (0.01)</td>
<td align="center">0.63 (0.01)</td>
<td align="center">0.63 (0.02)</td>
</tr>
<tr>
<td align="center">NB</td>
<td align="center">0.52 (0.0)</td>
<td align="center">0.0 (0.0)</td>
<td align="center">0.0 (0.0)</td>
<td align="center">0.0 (0.0)</td>
<td align="center">1.0 (0.0)</td>
</tr>
<tr>
<td align="center" rowspan="5"><bold>Case: Inputs without Word2Vec</bold></td>
<td align="center">LSTM</td>
<td align="center">0.64 (0.01)</td>
<td align="center">0.62 (0.02)</td>
<td align="center">0.65 (0.05)</td>
<td align="center">0.63 (0.02)</td>
<td align="center">0.64 (0.04)</td>
</tr>
<tr>
<td align="center">DNN</td>
<td align="center">0.61 (0.01)</td>
<td align="center">0.59 (0.02)</td>
<td align="center">0.58 (0.08)</td>
<td align="center">0.58 (0.04)</td>
<td align="center">0.63 (0.08)</td>
</tr>
<tr>
<td align="center">RF</td>
<td align="center">0.62 (0.01)</td>
<td align="center">0.6 (0.01)</td>
<td align="center">0.61 (0.01)</td>
<td align="center">0.61 (0.01)</td>
<td align="center">0.63 (0.02)</td>
</tr>
<tr>
<td align="center">LR</td>
<td align="center">0.61 (0.01)</td>
<td align="center">0.58 (0.02)</td>
<td align="center">0.62 (0.01)</td>
<td align="center">0.6 (0.01)</td>
<td align="center">0.6 (0.02)</td>
</tr>
<tr>
<td align="center">NB</td>
<td align="center">0.52 (0.0)</td>
<td align="center">0.6 (0.49)</td>
<td align="center">0.0 (0.0)</td>
<td align="center">0.0 (0.01)</td>
<td align="center">1.0 (0.0)</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>The calculation of metrics was based on the following formulas.</p>
<disp-formula id="pone.0239007.e006">
<alternatives>
<graphic id="pone.0239007.e006g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e006" xlink:type="simple"/>
<mml:math display="block" id="M6">
<mml:mrow><mml:mi>A</mml:mi><mml:mi>c</mml:mi><mml:mi>c</mml:mi><mml:mi>u</mml:mi><mml:mi>r</mml:mi><mml:mi>a</mml:mi><mml:mi>c</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e007">
<alternatives>
<graphic id="pone.0239007.e007g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e007" xlink:type="simple"/>
<mml:math display="block" id="M7">
<mml:mrow><mml:mi>S</mml:mi><mml:mi>e</mml:mi><mml:mi>n</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>i</mml:mi><mml:mi>v</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e008">
<alternatives>
<graphic id="pone.0239007.e008g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e008" xlink:type="simple"/>
<mml:math display="block" id="M8">
<mml:mrow><mml:mi>S</mml:mi><mml:mi>p</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>f</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>t</mml:mi><mml:mi>y</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>N</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e009">
<alternatives>
<graphic id="pone.0239007.e009g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e009" xlink:type="simple"/>
<mml:math display="block" id="M9">
<mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>c</mml:mi><mml:mi>i</mml:mi><mml:mi>s</mml:mi><mml:mi>i</mml:mi><mml:mi>o</mml:mi><mml:mi>n</mml:mi><mml:mo>=</mml:mo><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<disp-formula id="pone.0239007.e010">
<alternatives>
<graphic id="pone.0239007.e010g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0239007.e010" xlink:type="simple"/>
<mml:math display="block" id="M10">
<mml:mrow><mml:mi>F</mml:mi><mml:mn>1</mml:mn><mml:mi>‐</mml:mi><mml:mi>s</mml:mi><mml:mi>c</mml:mi><mml:mi>o</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mo>=</mml:mo><mml:mn>2</mml:mn><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>/</mml:mo><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:mn>2</mml:mn><mml:mi>T</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>P</mml:mi><mml:mo>+</mml:mo><mml:mi>F</mml:mi><mml:mi>N</mml:mi></mml:mrow><mml:mo>)</mml:mo></mml:mrow></mml:mrow>
</mml:math>
</alternatives>
</disp-formula>
<p>Where <italic>TP</italic> is true positive, <italic>TN</italic> is true negative, <italic>FP</italic> is false positive and <italic>FN</italic> is false negative.</p>
<p>We have also compared the statistical significance for metrics from different models by one-tailed t-test. For example, there were 5 values of accuracy from the 5-fold cross validation for LSTM model and DNN model. We performed a one-tailed t-test on these values of accuracy to determine the statistical significance. The p-values in the <bold><xref ref-type="table" rid="pone.0239007.t005">Table 5</xref></bold> show that almost all of the LSTM model performance metrics were significantly higher than other models.</p>
<table-wrap id="pone.0239007.t005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.t005</object-id>
<label>Table 5</label> <caption><title>Statistical significance of model comparison metrics.</title></caption>
<alternatives>
<graphic id="pone.0239007.t005g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.t005" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center">Cases</th>
<th align="center">Metrics</th>
<th align="center">(LSTM, DNN)</th>
<th align="center">(LSTM, RF)</th>
<th align="center">(LSTM, LR)</th>
<th align="center">(LSTM, NB)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="6"><bold>Case: Inputs with vectors by Word2Vec</bold></td>
<td align="center"><bold>AUC</bold></td>
<td align="center">2.2*10<sup>−4</sup></td>
<td align="center">3.3*10<sup>−6</sup></td>
<td align="center">4.8*10<sup>−6</sup></td>
<td align="center">8.0*10<sup>−8</sup></td>
</tr>
<tr>
<td align="center"><bold>Accuracy</bold></td>
<td align="center">5.9*10<sup>−4</sup></td>
<td align="center">2.9*10<sup>−5</sup></td>
<td align="center">2.3*10<sup>−5</sup></td>
<td align="center">2.6*10<sup>−9</sup></td>
</tr>
<tr>
<td align="center"><bold>Precision</bold></td>
<td align="center">8.8*10<sup>−4</sup></td>
<td align="center">3.5*10<sup>−5</sup></td>
<td align="center">1.3*10<sup>−4</sup></td>
<td align="center">1.4*10<sup>−12</sup></td>
</tr>
<tr>
<td align="center"><bold>Recall</bold></td>
<td align="center">0.09</td>
<td align="center">0.04</td>
<td align="center">0.2</td>
<td align="center">3.5*10<sup>−11</sup></td>
</tr>
<tr>
<td align="center"><bold>F1-score</bold></td>
<td align="center">0.1</td>
<td align="center">5.6*10<sup>−4</sup></td>
<td align="center">1.0*10<sup>−3</sup></td>
<td align="center">3.3*10<sup>−13</sup></td>
</tr>
<tr>
<td align="center"><bold>Specificity</bold></td>
<td align="center">0.003</td>
<td align="center">1.5*10<sup>−4</sup></td>
<td align="center">3.5*10<sup>−4</sup></td>
<td align="center">5.3*10<sup>−8</sup></td>
</tr>
<tr>
<td align="center" rowspan="6"><bold>Case: Inputs without Word2Vec</bold></td>
<td align="center"><bold>AUC</bold></td>
<td align="center">8.0*10<sup>−4</sup></td>
<td align="center">2.3*10<sup>−4</sup></td>
<td align="center">1.7*10<sup>−4</sup></td>
<td align="center">5.7*10<sup>−8</sup></td>
</tr>
<tr>
<td align="center"><bold>Accuracy</bold></td>
<td align="center">0.04</td>
<td align="center">0.02</td>
<td align="center">0.04</td>
<td align="center">6.3*10<sup>−8</sup></td>
</tr>
<tr>
<td align="center"><bold>Precision</bold></td>
<td align="center">0.04</td>
<td align="center">0.08</td>
<td align="center">0.006</td>
<td align="center">0.46</td>
</tr>
<tr>
<td align="center"><bold>Recall</bold></td>
<td align="center">0.1</td>
<td align="center">0.08</td>
<td align="center">0.1</td>
<td align="center">2.0*10<sup>−9</sup></td>
</tr>
<tr>
<td align="center"><bold>F1-score</bold></td>
<td align="center">0.02</td>
<td align="center">0.01</td>
<td align="center">0.06</td>
<td align="center">1.6*10<sup>−12</sup></td>
</tr>
<tr>
<td align="center"><bold>Specificity</bold></td>
<td align="center">0.42</td>
<td align="center">0.44</td>
<td align="center">0.09</td>
<td align="center">1.0*10<sup>−7</sup></td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>We examined the importance of each feature by evaluating the AUC after removal of the feature from the LSTM model (<bold><xref ref-type="fig" rid="pone.0239007.g005">Fig 5</xref>)</bold>. We demonstrated that removing bloodpressureideal and bloodpressureintermediate, the AUC values decreased largely, which indicated that blood pressure contributed to CD prediction largely for LSTM to discriminate CD patients from the healthy group.</p>
<fig id="pone.0239007.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0239007.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Feature discriminative importance evaluated using the LSTM model.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.g005" xlink:type="simple"/>
</fig>
</sec>
<sec id="sec012" sec-type="conclusions">
<title>Discussion</title>
<p>In this study, we utilized data from clinics across the US to examine the association between CVH measures and CD diagnoses over a 10-year period by employing traditional machine learning models and deep learning techniques. Our results indicated that the deep learning algorithm–LSTM–outperformed other traditional machine learning algorithms and achieved the best accuracy performance. Our results in which we dropped one feature at a time by setting it as constant from our analyses indicated that patients with ideal CVH especially blood pressure was associated with diagnosis with CD, which was consistent with results that high blood pressure and obesity might increase the chances of developing a CD. Additional risk factors that were associated with a higher prevalence of CD included female gender and white race. Our study is the first to utilize the LSTM to investigate the relationship between time-series CVH measurements and CD diagnoses. Not surprisingly, the LSTM deep learning model achieved the best performance compared with the traditional machine learning algorithms used in previous EHR data studies [<xref ref-type="bibr" rid="pone.0239007.ref034">34</xref>]. An advantage of our study is that the results represent associations seen in over 70 clinics in the US. In addition, we will investigate more about the time-aware LSTM models [<xref ref-type="bibr" rid="pone.0239007.ref035">35</xref>, <xref ref-type="bibr" rid="pone.0239007.ref036">36</xref>] to better capture the underlying patterns in the irregular time intervals in the longitudinal EHR data.</p>
<sec id="sec013">
<title>Limitations</title>
<p>We encountered some limitations to using EHR data for these analyses. First, patients had different times for visits of CVH measurements as some patients visited more frequently and had high numbers of visits and some just had a few visits. To address this, we created virtual events for patients with fewer visits in order to conduct our analyses. Second, the prediction accuracy might be further improved with additional demographic and clinical factors in addition to the regular medical visits and measurements used in this study (e.g., health data collected from wearable devices). Our findings would be much more generalizable if we had greater representation from more clinics across the U.S.</p>
</sec>
</sec>
<sec id="sec014" sec-type="conclusions">
<title>Conclusions</title>
<p>Deep learning models can effectively predict incident CD from time-series CVH measurements compared with traditional machine learning algorithms. Ideal CVH scores, especially BMI and blood pressure, could be associated with lower chance of developing CD. This study determined the extent to which ideal CVH is important to attain and maintain for more favorable outcomes. These findings may be used to prevent CD in the outpatient setting by encouraging appropriate management of CVH.</p>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pone.0239007.ref001"><label>1</label><mixed-citation publication-type="book" xlink:type="simple"><collab>American Heart Association</collab>. <source>What Is an Arrhythmia?</source>—<publisher-name>NHLBI, NIH. Explor Asthma</publisher-name>. <year>2015</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/sia.740150809" xlink:type="simple">10.1002/sia.740150809</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref002"><label>2</label><mixed-citation publication-type="other" xlink:type="simple"><ext-link ext-link-type="uri" xlink:href="https://www.scripps.org/sparkle-assets/documents/heart_rhythm_facts.pdf" xlink:type="simple">https://www.scripps.org/sparkle-assets/documents/heart_rhythm_facts.pdf</ext-link>.</mixed-citation></ref>
<ref id="pone.0239007.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mehra</surname> <given-names>R</given-names></name>. <article-title>Global public health problem of sudden cardiac death</article-title>. <source>J Electrocardiol</source>. <year>2007</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jelectrocard.2007.06.023" xlink:type="simple">10.1016/j.jelectrocard.2007.06.023</ext-link></comment> <object-id pub-id-type="pmid">17993308</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref004"><label>4</label><mixed-citation publication-type="other" xlink:type="simple"><ext-link ext-link-type="uri" xlink:href="http://www.heart.org/en/health-topics/arrhythmia/prevention—treatment-of-arrhythmia" xlink:type="simple">http://www.heart.org/en/health-topics/arrhythmia/prevention—treatment-of-arrhythmia</ext-link>.</mixed-citation></ref>
<ref id="pone.0239007.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jensen</surname> <given-names>PB</given-names></name>, <name name-style="western"><surname>Jensen</surname> <given-names>LJ</given-names></name>, <name name-style="western"><surname>Brunak</surname> <given-names>S</given-names></name>. <article-title>Mining electronic health records: Towards better research applications and clinical care</article-title>. <source>Nature Reviews Genetics</source>. <year>2012</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nrg3208" xlink:type="simple">10.1038/nrg3208</ext-link></comment> <object-id pub-id-type="pmid">22549152</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Anderson</surname> <given-names>AE</given-names></name>, <name name-style="western"><surname>Kerr</surname> <given-names>WT</given-names></name>, <name name-style="western"><surname>Thames</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Li</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Xiao</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Cohen</surname> <given-names>MS</given-names></name>. <article-title>Electronic health record phenotyping improves detection and screening of type 2 diabetes in the general United States population: A cross-sectional, unselected, retrospective study</article-title>. <source>J Biomed Inform</source>. <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2015.12.006" xlink:type="simple">10.1016/j.jbi.2015.12.006</ext-link></comment> <object-id pub-id-type="pmid">26707455</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>GRT</surname> <given-names>H</given-names></name> and V. <article-title>Recent progress in the epidemiology of atrial fibrillation</article-title>. <source>Curr Opin Cardiol</source>. <year>2005</year>;<volume>20</volume>: <fpage>7</fpage>. <object-id pub-id-type="pmid">15596953</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lubitz</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Yi</surname> <given-names>BA</given-names></name>, <name name-style="western"><surname>Ellinor</surname> <given-names>PT</given-names></name>. <article-title>Genetics of Atrial Fibrillation</article-title>. <source>Heart Failure Clinics</source>. <year>2010</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.hfc.2009.12.004" xlink:type="simple">10.1016/j.hfc.2009.12.004</ext-link></comment> <object-id pub-id-type="pmid">20347792</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vidaillet</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Granada</surname> <given-names>JF</given-names></name>, <name name-style="western"><surname>Chyou</surname> <given-names>PH</given-names></name>, <name name-style="western"><surname>Maassen</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Ortiz</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Pulido</surname> <given-names>JN</given-names></name>, <etal>et al</etal>. <article-title>A population-based study of mortality among patients with atrial fibrillation or flutter</article-title>. <source>Am J Med</source>. <year>2002</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/s0002-9343%2802%2901253-6" xlink:type="simple">10.1016/s0002-9343(02)01253-6</ext-link></comment> <object-id pub-id-type="pmid">12401530</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kolek</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Graves</surname> <given-names>AJ</given-names></name>, <name name-style="western"><surname>Xu</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bian</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Teixeira</surname> <given-names>PL</given-names></name>, <name name-style="western"><surname>Shoemaker</surname> <given-names>MB</given-names></name>, <etal>et al</etal>. <article-title>Evaluation of a prediction model for the development of atrial fibrillation in a repository of electronic medical records</article-title>. <source>JAMA Cardiol</source>. <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1001/jamacardio.2016.3366" xlink:type="simple">10.1001/jamacardio.2016.3366</ext-link></comment> <object-id pub-id-type="pmid">27732699</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref011"><label>11</label><mixed-citation publication-type="other" xlink:type="simple">Karnik S, Tan SL, Berg B, Glurich I, Zhang J, Vidaillet HJ, et al. Predicting atrial fibrillation and flutter using electronic health records. Proceedings of the Annual International Conference of the IEEE Engineering in Medicine and Biology Society, EMBS. 2012. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/EMBC.2012.6347254" xlink:type="simple">10.1109/EMBC.2012.6347254</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Khurshid</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Keaney</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Ellinor</surname> <given-names>PT</given-names></name>, <name name-style="western"><surname>Lubitz</surname> <given-names>SA</given-names></name>. <article-title>A simple and portable algorithm for identifying atrial fibrillation in the electronic medical record</article-title>. <source>Am J Cardiol</source>. <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.amjcard.2015.10.031" xlink:type="simple">10.1016/j.amjcard.2015.10.031</ext-link></comment> <object-id pub-id-type="pmid">26684516</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bhattacharya</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Lu</surname> <given-names>DY</given-names></name>, <name name-style="western"><surname>Lingamaneni</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Kudchadkar</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Villareal</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Sivalokanathan</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>Identifying ventricular arrhythmia cases and their predictors by applying machine learning methods to electronic health records (EHR) of hypertrophic cardiomyopathy (HCM) patients</article-title>. <source>Circulation</source>. <year>2017</year>.</mixed-citation></ref>
<ref id="pone.0239007.ref014"><label>14</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Goodfellow</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Bengio</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Courville</surname> <given-names>A</given-names></name>. <source>Deep Learning</source>. <publisher-name>MIT Press</publisher-name>. <year>2016</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1533/9780857099440.59" xlink:type="simple">10.1533/9780857099440.59</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ayala Solares</surname> <given-names>JR</given-names></name>, <name name-style="western"><surname>Diletta Raimondi</surname> <given-names>FE</given-names></name>, <name name-style="western"><surname>Zhu</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Rahimian</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Canoy</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Tran</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Deep learning for electronic health records: A comparative review of multiple deep neural architectures</article-title>. <source>Journal of Biomedical Informatics</source>. <year>2020</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jbi.2019.103337" xlink:type="simple">10.1016/j.jbi.2019.103337</ext-link></comment> <object-id pub-id-type="pmid">31916973</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Xiao</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Choi</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Sun</surname> <given-names>J</given-names></name>. <article-title>Opportunities and challenges in developing deep learning models using electronic health records data: A systematic review</article-title>. <source>Journal of the American Medical Informatics Association</source>. <year>2018</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/jamia/ocy068" xlink:type="simple">10.1093/jamia/ocy068</ext-link></comment> <object-id pub-id-type="pmid">29893864</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref017"><label>17</label><mixed-citation publication-type="other" xlink:type="simple">Shashikumar SP, Shah AJ, Li Q, Clifford GD, Nemati S. A deep learning approach to monitoring and detecting atrial fibrillation using wearable technology. 2017 IEEE EMBS International Conference on Biomedical and Health Informatics, BHI 2017. 2017. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/BHI.2017.7897225" xlink:type="simple">10.1109/BHI.2017.7897225</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rajkomar</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Oren</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Dai</surname> <given-names>AM</given-names></name>, <name name-style="western"><surname>Hajaj</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Hardt</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Scalable and accurate deep learning with electronic health records</article-title>. <source>npj Digit Med</source>. <year>2018</year>;<volume>1</volume>: <fpage>18</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41746-018-0029-1" xlink:type="simple">10.1038/s41746-018-0029-1</ext-link></comment> <object-id pub-id-type="pmid">31304302</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hochreiter</surname> <given-names>S&amp; S</given-names></name>. <article-title>Long short-term memory</article-title>. <source>Neural Comput</source>. <year>1997</year>;<volume>9</volume>: <fpage>1735</fpage>–<lpage>1780</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1162/neco.1997.9.8.1735" xlink:type="simple">10.1162/neco.1997.9.8.1735</ext-link></comment> <object-id pub-id-type="pmid">9377276</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bufalino</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Bauman</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Shubrook</surname> <given-names>JH</given-names></name> <etal>et al</etal>. <article-title>Evolution of “The Guideline Advantage”: Lessons learned from the front lines of outpatient performance measurement</article-title>. <source>CA Cancer J Clin</source>. <year>2014</year>;<volume>64</volume>: <fpage>157</fpage>–<lpage>163</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3322/caac.21233" xlink:type="simple">10.3322/caac.21233</ext-link></comment> <object-id pub-id-type="pmid">24788583</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><ext-link ext-link-type="uri" xlink:href="https://www.hcup-us.ahrq.gov/toolssoftware/ccs/ccs.jsp" xlink:type="simple">https://www.hcup-us.ahrq.gov/toolssoftware/ccs/ccs.jsp</ext-link>.</mixed-citation></ref>
<ref id="pone.0239007.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><ext-link ext-link-type="uri" xlink:href="https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html" xlink:type="simple">https://www.nlm.nih.gov/research/umls/rxnorm/sourcereleasedocs/mmsl.html</ext-link>.</mixed-citation></ref>
<ref id="pone.0239007.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Levenshtein</surname> <given-names>VI</given-names></name>. <article-title>Binary codes capable of correcting deletions, insertions, and reversals</article-title>. <source>Sov Phys Dokl</source>. <year>1966</year>. citeulike-article-id:311174</mixed-citation></ref>
<ref id="pone.0239007.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lloyd-Jones</surname> <given-names>DM</given-names></name>, <name name-style="western"><surname>Hong</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Labarthe</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Mozaffarian</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Appel</surname> <given-names>LJ</given-names></name>, <name name-style="western"><surname>Van Horn</surname> <given-names>L</given-names></name>, <etal>et al</etal>. <article-title>Defining and Setting National Goals for Cardiovascular Health Promotion and Disease Reduction</article-title>. <source>Circulation</source>. <year>2010</year>;<volume>121</volume>: <fpage>586</fpage>–<lpage>613</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1161/CIRCULATIONAHA.109.192703" xlink:type="simple">10.1161/CIRCULATIONAHA.109.192703</ext-link></comment> <object-id pub-id-type="pmid">20089546</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref025"><label>25</label><mixed-citation publication-type="other" xlink:type="simple">Mikolov T, Corrado G, Chen K, Dean J. word2vec. Proc Int Conf Learn Represent (ICLR 2013). 2013.</mixed-citation></ref>
<ref id="pone.0239007.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hannun</surname> <given-names>AY</given-names></name>, <name name-style="western"><surname>Rajpurkar</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Haghpanahi</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Tison</surname> <given-names>GH</given-names></name>, <name name-style="western"><surname>Bourn</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Turakhia</surname> <given-names>MP</given-names></name>, <etal>et al</etal>. <article-title>Cardiologist-level arrhythmia detection and classification in ambulatory electrocardiograms using a deep neural network</article-title>. <source>Nat Med</source>. <year>2019</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41591-018-0268-3" xlink:type="simple">10.1038/s41591-018-0268-3</ext-link></comment> <object-id pub-id-type="pmid">30617320</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hosmer</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Lemeshow</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Sturdivant</surname> <given-names>RX</given-names></name>. <article-title>Model-Building Strategies and Methods for Logistic Regression</article-title>. <source>Applied Logistic Regression</source>. <year>2013</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/0471722146.ch4" xlink:type="simple">10.1002/0471722146.ch4</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Breiman</surname> <given-names>L</given-names></name>. <article-title>Random forests</article-title>. <source>Mach Learn</source>. <year>2001</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1023/A%3A1010933404324" xlink:type="simple">10.1023/A:1010933404324</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Friedman</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Geiger</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Goldszmidt</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Provan</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Langley</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Smyth</surname> <given-names>P</given-names></name>. <article-title>Bayesian Network Classifiers</article-title>. <source>Machine Learning</source>. <year>1997</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1023/a%3A1007465528199" xlink:type="simple">10.1023/a:1007465528199</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref030"><label>30</label><mixed-citation publication-type="other" xlink:type="simple">Jozefowicz R, Zaremba W, Sutskever I. An empirical exploration of Recurrent Network architectures. 32nd International Conference on Machine Learning, ICML 2015. 2015.</mixed-citation></ref>
<ref id="pone.0239007.ref031"><label>31</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Kingma</surname> <given-names>DP</given-names></name>, <name name-style="western"><surname>Ba</surname> <given-names>J</given-names></name>. <source>ADAM: A METHOD FOR STOCHASTIC OPTIMIZATION</source>. <publisher-name>CoRR</publisher-name>. <year>2015</year>;<fpage>abs/1412.6</fpage>.</mixed-citation></ref>
<ref id="pone.0239007.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Han</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Moraga</surname> <given-names>C</given-names></name>. <article-title>The influence of the sigmoid function parameters on the speed of backpropagation learning</article-title>. <source>Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)</source>. <year>1995</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/3-540-59497-3%5F175" xlink:type="simple">10.1007/3-540-59497-3_175</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Van Der Maaten</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Hinton</surname> <given-names>G</given-names></name>. <article-title>Visualizing data using t-SNE</article-title>. <source>J Mach Learn Res</source>. <year>2008</year>.</mixed-citation></ref>
<ref id="pone.0239007.ref034"><label>34</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shickel</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Tighe</surname> <given-names>PJ</given-names></name>, <name name-style="western"><surname>Bihorac</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Rashidi</surname> <given-names>P</given-names></name>. <article-title>Deep EHR: A Survey of Recent Advances in Deep Learning Techniques for Electronic Health Record (EHR) Analysis</article-title>. <source>IEEE J Biomed Heal Informatics</source>. <year>2018</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/JBHI.2017.2767063" xlink:type="simple">10.1109/JBHI.2017.2767063</ext-link></comment> <object-id pub-id-type="pmid">29989977</object-id></mixed-citation></ref>
<ref id="pone.0239007.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Baytas</surname> <given-names>IM</given-names></name>, <name name-style="western"><surname>Xiao</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Wang</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Jain</surname> <given-names>AK</given-names></name>, <name name-style="western"><surname>Zhou</surname> <given-names>J</given-names></name>. <article-title>Patient subtyping via time-aware LSTM networks</article-title>. <source>Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining</source>. <year>2017</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1145/3097983.3097997" xlink:type="simple">10.1145/3097983.3097997</ext-link></comment></mixed-citation></ref>
<ref id="pone.0239007.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Yang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Ivy</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Chi</surname> <given-names>M</given-names></name>. <article-title>Attain: Attention-based time-aware LSTM networks for disease progression modeling</article-title>. <source>IJCAI International Joint Conference on Artificial Intelligence</source>. <year>2019</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.24963/ijcai.2019/607" xlink:type="simple">10.24963/ijcai.2019/607</ext-link></comment></mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pone.0239007.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Ramagopalan</surname>
<given-names>Sreeram V.</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Sreeram V. Ramagopalan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0239007" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">9 Jun 2020</named-content>
</p>
<p>PONE-D-20-10776</p>
<p>Application of a time-series deep learning model to predict cardiac dysrhythmias in electronic health records</p>
<p>PLOS ONE</p>
<p>Dear Dr. Foraker,</p>
<p>Thank you for submitting your manuscript to PLOS ONE. After careful consideration, we feel that it has merit but does not fully meet PLOS ONE’s publication criteria as it currently stands. Therefore, we invite you to submit a revised version of the manuscript that addresses the points raised during the review process.</p>
<p>Please submit your revised manuscript by Jul 24 2020 11:59PM. If you will need more time than this to complete your revisions, please reply to this message or contact the journal office at <email xlink:type="simple">plosone@plos.org</email>. When you're ready to submit your revision, log on to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">https://www.editorialmanager.com/pone/</ext-link> and select the 'Submissions Needing Revision' folder to locate your manuscript file.</p>
<p>Please include the following items when submitting your revised manuscript:</p>
<p><list list-type="bullet"><list-item><p>A rebuttal letter that responds to each point raised by the academic editor and reviewer(s). You should upload this letter as a separate file labeled 'Response to Reviewers'.</p></list-item><list-item><p>A marked-up copy of your manuscript that highlights changes made to the original version. You should upload this as a separate file labeled 'Revised Manuscript with Track Changes'.</p></list-item><list-item><p>An unmarked version of your revised paper without tracked changes. You should upload this as a separate file labeled 'Manuscript'.</p></list-item></list></p>
<p>If you would like to make changes to your financial disclosure, please include your updated statement in your cover letter. Guidelines for resubmitting your figure files are available below the reviewer comments at the end of this letter.</p>
<p>If applicable, we recommend that you deposit your laboratory protocols in protocols.io to enhance the reproducibility of your results. Protocols.io assigns your protocol its own identifier (DOI) so that it can be cited independently in the future. For instructions see: <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols" xlink:type="simple">http://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols</ext-link></p>
<p>We look forward to receiving your revised manuscript.</p>
<p>Kind regards,</p>
<p>Sreeram V. Ramagopalan</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Journal Requirements:</p>
<p>When submitting your revision, we need you to address these additional requirements.</p>
<p>1. Please ensure that your manuscript meets PLOS ONE's style requirements, including those for file naming. The PLOS ONE style templates can be found at <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf</ext-link> and <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf</ext-link></p>
<p>2. Our internal editors have evaluated your manuscript and determined that it is within the scope of our 'Primary and Secondary Prevention of Cardiovascular Disease' Call for Papers. This collection of papers is headed by a team of Guest Editors for PLOS ONE and will encompass a diverse range of research articles. Additional information can be found on our announcement page: (<ext-link ext-link-type="uri" xlink:href="https://collections.plos.org/s/prevention-cardiovascular" xlink:type="simple">https://collections.plos.org/s/prevention-cardiovascular</ext-link>). If you would like your manuscript to be considered for this collection, please let us know in your cover letter and we will ensure that your paper is treated as if you were responding to this call. If you would prefer to remove your manuscript from collection consideration, please specify this in the cover letter.</p>
<p>3. In ethics statement in the manuscript and in the online submission form, please provide additional information about the patient records used in your retrospective study. Specifically, please ensure that you have discussed whether all data were fully anonymized before you accessed them and/or whether the IRB or ethics committee waived the requirement for informed consent. If patients provided informed written consent to have data from their medical records used in research, please include this information.</p>
<p>4. We note that you have indicated that data from this study are available upon request. PLOS only allows data to be available upon request if there are legal or ethical restrictions on sharing data publicly. For information on unacceptable data access restrictions, please see <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/data-availability#loc-unacceptable-data-access-restrictions" xlink:type="simple">http://journals.plos.org/plosone/s/data-availability#loc-unacceptable-data-access-restrictions</ext-link>.</p>
<p>In your revised cover letter, please address the following prompts:</p>
<p>a) If there are ethical or legal restrictions on sharing a de-identified data set, please explain them in detail (e.g., data contain potentially identifying or sensitive patient information) and who has imposed them (e.g., an ethics committee). Please also provide contact information for a data access committee, ethics committee, or other institutional body to which data requests may be sent.</p>
<p>b) If there are no restrictions, please upload the minimal anonymized data set necessary to replicate your study findings as either Supporting Information files or to a stable, public repository and provide us with the relevant URLs, DOIs, or accession numbers. Please see <ext-link ext-link-type="uri" xlink:href="http://www.bmj.com/content/340/bmj.c181.long" xlink:type="simple">http://www.bmj.com/content/340/bmj.c181.long</ext-link> for guidelines on how to de-identify and prepare clinical data for publication. For a list of acceptable repositories, please see <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/data-availability#loc-recommended-repositories" xlink:type="simple">http://journals.plos.org/plosone/s/data-availability#loc-recommended-repositories</ext-link>.</p>
<p>We will update your Data Availability statement on your behalf to reflect the information you provide.</p>
<p>Additional Editor Comments (if provided):</p>
<p>[Note: HTML markup is below. Please do not edit.]</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p>Reviewer #1: In their manuscript titled "Application of a time-series deep learning model to predict cardiac dysrhythmias in electronic health records", Guo et al use the Electronic healthcare records (EHR) data available in The Guideline Advantage (TGA) and deep learning methodologies to make predictions on cardiac dysrythimas (CD) based on cardiovascular health (CVH) measures. They propose that LSTM performs better than LR, RF and NB and identify blood pressure and BMI as the most influential features.</p>
<p>Extracting time series information based on EHR data is a topic of high interest in the community and the question the authors pose, namely "can we predict CD based on EHR data using machine learning methodologies?" is very timely because the quality and quantity of available data is increasing and machine learning techniques have become not only more powerful but also more interpretable and robust. The authors states that the paper is the first time that DL algorithms have been applied to predict CD using time series EHR data. However, the manuscript has two major shortcomings; the methodology is not described in detail to be reproducible and the time dependent information does not reflect real time.</p>
<p>Time dependence: DL has been used for different classification tasks on EHR and CD prediction using different types of datasets (Solares 2020, Xia 2018, Shashikumar 2017). This study builds on literature by applying existing methods to a previously studied dataset. The authors state that this is the first that DL is applied to predict CD using time series EHR data. Indeed, if time dependence is incorporated into the predictive model, this is very interesting. However, it is not clear how time is included in the representations. The authors also mention this in the Limitations section. The events are represented by 311 time steps. Are two events separated by two days treated the same way as two events separated by two months? The authors state they use padding to create events so that they can have a 311D vector. How is this padding performed? If dynamic data is the major contribution of the paper, more information about the treatment of time should be included. Furthermore, it is not clear how blood pressure and BMI are the most influential features are impacted by time dependence.</p>
<p>Word2Vec: The study first constructs categorical features to represent patients and then learns continuous vectors usingWord2Vec algorithm. The authors can compare Word2Vec with categorical representations to show the value added by distributed representations. Besides, the construction of categorical vectors and the association of vectors with time can be detailed further, since these are key to the representations.</p>
<p>LSTM: LSTM is adopted as a neural time-series model to identify CD patients and it is compared with three benchmark models: Naive Bayes, Logistic Regression and Random Forest. Given that these models are neither designed to classify time-series nor as complex as a neural network, other models, such as non-neural time-series models and feed-forward neural networks, can be added as benchmarks to reveal the benefits of LSTM further. This would also uncover if the performance gain is due to time-series representation or complexity of neural models.</p>
<p>Causation: The study concludes that “BMI and blood pressure could lower the chance of developing CD” based on the fact that the removal of these features affected the prediction performance the most. This association can be analyzed in more detail, since these features are first embedded using Word2Vec and, thus, not directly observed by the prediction model. Moreover, the causation can be shown explicitly, since the performance drop might not necessarily indicate a causation but might simply mean that these features helped model to discriminate CD patients from the healthy group the most.</p>
<p>DL model: The code is not yet available. The authors have not included important details about the algorithms used. For example, how do the authors test for overfit? Is 5-fold cross validation performed? What is the error in their performance metrics? What is the statistical significance of their model comparison metrics?</p>
<p>The manuscript is well organized and easy to follow but more details about the methodology are needed and the conclusions are not supported by the experiments.</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>[NOTE: If reviewer comments were submitted as an attachment file, they will be attached to this email and accessible via the submission site. Please log into your account, locate the manuscript record, and check for the action link "View Attachments". If this link does not appear, there are no attachment files.]</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com/</ext-link>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Registration is free. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email PLOS at <email xlink:type="simple">figures@plos.org</email>. Please note that Supporting Information files do not need this step.</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pone.0239007.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pone.0239007" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pone.0239007.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">24 Jul 2020</named-content>
</p>
<p>We would like to thank the editor and the reviewer for the great and helpful comments. Please find our point-by-point response to the reviews and editors in the attached file 'Response to reviewers'.</p>
<supplementary-material id="pone.0239007.s001" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.s001" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to reviewers.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pone.0239007.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Ramagopalan</surname>
<given-names>Sreeram V.</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Sreeram V. Ramagopalan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0239007" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">13 Oct 2020</named-content>
</p>
<p>PONE-D-20-10776R1</p>
<p>Application of a time-series deep learning model to predict cardiac dysrhythmias in electronic health records</p>
<p>PLOS ONE</p>
<p>Dear Dr. Foraker,</p>
<p>Thank you for submitting your manuscript to PLOS ONE. After careful consideration, we feel that it has merit but does not fully meet PLOS ONE’s publication criteria as it currently stands. Therefore, we invite you to submit a revised version of the manuscript that addresses the points raised during the review process.</p>
<p>==============================</p>
<p>Dear Authors, you have since the acceptance of this manuscript, resubmitted the manuscript with changes stating that analyses were revised and different results obtained. On the version that I received, these changes were not reflected in the abstract and elsewhere in the manuscript. As these changes were materially impacting the manuscript we will send the manuscript for re-review. If you do wish to resubmit the manuscript please make sure the updated manuscript fully accounts for the new results, otherwise we will not be willing to proceed further with you manuscript.</p>
<p>==============================</p>
<p>Please submit your revised manuscript by Nov 27 2020 11:59PM. If you will need more time than this to complete your revisions, please reply to this message or contact the journal office at <email xlink:type="simple">plosone@plos.org</email>. When you're ready to submit your revision, log on to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">https://www.editorialmanager.com/pone/</ext-link> and select the 'Submissions Needing Revision' folder to locate your manuscript file.</p>
<p>Please include the following items when submitting your revised manuscript:</p>
<p><list list-type="bullet"><list-item><p>A rebuttal letter that responds to each point raised by the academic editor and reviewer(s). You should upload this letter as a separate file labeled 'Response to Reviewers'.</p></list-item><list-item><p>A marked-up copy of your manuscript that highlights changes made to the original version. You should upload this as a separate file labeled 'Revised Manuscript with Track Changes'.</p></list-item><list-item><p>An unmarked version of your revised paper without tracked changes. You should upload this as a separate file labeled 'Manuscript'.</p></list-item></list></p>
<p>If you would like to make changes to your financial disclosure, please include your updated statement in your cover letter. Guidelines for resubmitting your figure files are available below the reviewer comments at the end of this letter.</p>
<p>If applicable, we recommend that you deposit your laboratory protocols in protocols.io to enhance the reproducibility of your results. Protocols.io assigns your protocol its own identifier (DOI) so that it can be cited independently in the future. For instructions see: <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols" xlink:type="simple">http://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols</ext-link></p>
<p>We look forward to receiving your revised manuscript.</p>
<p>Kind regards,</p>
<p>Sreeram V. Ramagopalan</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>[Note: HTML markup is below. Please do not edit.]</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. If the authors have adequately addressed your comments raised in a previous round of review and you feel that this manuscript is now acceptable for publication, you may indicate that here to bypass the “Comments to the Author” section, enter your conflict of interest statement in the “Confidential to Editor” section, and submit your "Accept" recommendation.<!-- </font> --></p>
<p>Reviewer #1: All comments have been addressed</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p>Reviewer #1: The authors have addressed the concerns. The authors can include some references to time aware LSTM for future work (eg DOI: 10.1145/3097983.3097997).</p>
<p>**********</p>
<p><!-- <font color="black"> -->7. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>[NOTE: If reviewer comments were submitted as an attachment file, they will be attached to this email and accessible via the submission site. Please log into your account, locate the manuscript record, and check for the action link "View Attachments". If this link does not appear, there are no attachment files.]</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com/</ext-link>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Registration is free. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email PLOS at <email xlink:type="simple">figures@plos.org</email>. Please note that Supporting Information files do not need this step.</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pone.0239007.r004">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007.r004</article-id>
<title-group>
<article-title>Author response to Decision Letter 1</article-title>
</title-group>
<related-object document-id="10.1371/journal.pone.0239007" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj004" link-type="rebutted-decision-letter" object-id="10.1371/journal.pone.0239007.r003" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">21 Oct 2020</named-content>
</p>
<p>Response to reviewer comments</p>
<p>We would like to thank the reviewers for their informed, thoughtful, and helpful comments. Please find our responses to the reviews below in italics. We believe that the manuscript has been significantly improved by our responses to the reviewers and hope that you will find it suitable for publication in the PLOS ONE.</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p>Comments to the Author</p>
<p>1. If the authors have adequately addressed your comments raised in a previous round of review and you feel that this manuscript is now acceptable for publication, you may indicate that here to bypass the “Comments to the Author” section, enter your conflict of interest statement in the “Confidential to Editor” section, and submit your "Accept" recommendation.</p>
<p>Reviewer #1: All comments have been addressed</p>
<p>2. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. </p>
<p>Reviewer #1: Yes</p>
<p>3. Has the statistical analysis been performed appropriately and rigorously? </p>
<p>Reviewer #1: Yes</p>
<p>4. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The PLOS Data policy requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.</p>
<p>Reviewer #1: Yes</p>
<p>5. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.</p>
<p>Reviewer #1: Yes</p>
<p>6. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)</p>
<p>Reviewer #1: </p>
<p>The authors have addressed the concerns. The authors can include some references to time aware LSTM for future work (eg DOI: 10.1145/3097983.3097997).</p>
<p>Thank you. Our apologies for not adding these references in the original manuscript. We have added the following references and also added to the discussion section as the future work.</p>
<p>1.Baytas IM, Xiao C, Zhang X, Wang F, Jain AK, Zhou J. Patient subtyping via time-aware  LSTM networks. In: Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery and Data Mining. 2017.</p>
<p>2.Zhang Y, Yang X, Ivy J, Chi M. Attain: Attention-based time-aware LSTM networks for disease progression modeling. IJCAI International Joint Conference on Artificial Intelligence. 2019. doi:10.24963/ijcai.2019/607.</p>
<p>Dear reviewer,</p>
<p>We made an error in data pre-processing during the last revision process. We have since found the error, corrected it, and reproduced the related results (specifically, Figures 4 &amp; 5, and Tables 4 &amp; 5). All of the original conclusions and patterns still hold and the values (Figures 4 &amp; 5) of the results are similar to the initial submission, prior to the submitted revision. Tables 4 &amp; 5 were newly added during the last revision by suggestions from reviewers.</p>
<p>To provide additional context, the final values of the results (Figures 4 &amp; 5, and Tables 4 &amp; 5) we now include in the manuscript for your kind consideration.</p>
<p>We apologize for any inconvenience this may cause and are grateful for your time and expertise in assessing our manuscript. We hope you find the revised manuscript suitable for publication; in the meantime, please let us know if you have any questions or concerns.</p>
<p>Sincerely,</p>
<p>Randi Foraker, PhD, MA, FAHA</p>
<p>Associate Professor of Medicine</p>
<p>Washington university in St Louis</p>
<supplementary-material id="pone.0239007.s002" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pone.0239007.s002" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to reviewers.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pone.0239007.r005" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007.r005</article-id>
<title-group>
<article-title>Decision Letter 2</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Ramagopalan</surname>
<given-names>Sreeram V.</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Sreeram V. Ramagopalan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0239007" document-id-type="doi" document-type="article" id="rel-obj005" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>2</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">6 Nov 2020</named-content>
</p>
<p>Application of a time-series deep learning model to predict cardiac dysrhythmias in electronic health records</p>
<p>PONE-D-20-10776R2</p>
<p>Dear Dr. Foraker,</p>
<p>We’re pleased to inform you that your manuscript has been judged scientifically suitable for publication and will be formally accepted for publication once it meets all outstanding technical requirements.</p>
<p>Within one week, you’ll receive an e-mail detailing the required amendments. When these have been addressed, you’ll receive a formal acceptance letter and your manuscript will be scheduled for publication.</p>
<p>An invoice for payment will follow shortly after the formal acceptance. To ensure an efficient process, please log into Editorial Manager at <ext-link ext-link-type="uri" xlink:href="http://www.editorialmanager.com/pone/" xlink:type="simple">http://www.editorialmanager.com/pone/</ext-link>, click the 'Update My Information' link at the top of the page, and double check that your user information is up-to-date. If you have any billing related questions, please contact our Author Billing department directly at <email xlink:type="simple">authorbilling@plos.org</email>.</p>
<p>If your institution or institutions have a press office, please notify them about your upcoming paper to help maximize its impact. If they’ll be preparing press materials, please inform our press team as soon as possible -- no later than 48 hours after receiving the formal acceptance. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>Kind regards,</p>
<p>Sreeram V. Ramagopalan</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Additional Editor Comments (optional):</p>
<p>Reviewers' comments:</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pone.0239007.r006" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0239007.r006</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Ramagopalan</surname>
<given-names>Sreeram V.</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2021</copyright-year>
<copyright-holder>Sreeram V. Ramagopalan</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0239007" document-id-type="doi" document-type="article" id="rel-obj006" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">20 Nov 2020</named-content>
</p>
<p>PONE-D-20-10776R2 </p>
<p>Application of a time-series deep learning model to predict cardiac dysrhythmias in electronic health records </p>
<p>Dear Dr. Foraker:</p>
<p>I'm pleased to inform you that your manuscript has been deemed suitable for publication in PLOS ONE. Congratulations! Your manuscript is now with our production department. </p>
<p>If your institution or institutions have a press office, please let them know about your upcoming paper now to help maximize its impact. If they'll be preparing press materials, please inform our press team within the next 48 hours. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>If we can help with anything else, please email us at <email xlink:type="simple">plosone@plos.org</email>. </p>
<p>Thank you for submitting your work to PLOS ONE and supporting open access. </p>
<p>Kind regards, </p>
<p>PLOS ONE Editorial Office Staff</p>
<p>on behalf of</p>
<p>Dr. Sreeram V. Ramagopalan  </p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
</body>
</sub-article>
</article>