<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS Negl Trop Dis</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosntds</journal-id>
<journal-title-group>
<journal-title>PLOS Neglected Tropical Diseases</journal-title>
</journal-title-group>
<issn pub-type="epub">1935-2735</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">PNTD-D-15-01964</article-id>
<article-id pub-id-type="doi">10.1371/journal.pntd.0004761</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Mathematical and statistical techniques</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Forecasting</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Statistics</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Forecasting</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Public and occupational health</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Medical conditions</subject><subj-group><subject>Tropical diseases</subject><subj-group><subject>Neglected tropical diseases</subject><subj-group><subject>Dengue fever</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Medical conditions</subject><subj-group><subject>Infectious diseases</subject><subj-group><subject>Viral diseases</subject><subj-group><subject>Dengue fever</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Geographical locations</subject><subj-group><subject>Asia</subject><subj-group><subject>Thailand</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Medical conditions</subject><subj-group><subject>Infectious diseases</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Epidemiology</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Epidemiology</subject><subj-group><subject>Infectious disease epidemiology</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Medical conditions</subject><subj-group><subject>Infectious diseases</subject><subj-group><subject>Infectious disease epidemiology</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Population groupings</subject><subj-group><subject>Ethnicities</subject><subj-group><subject>Asian people</subject><subj-group><subject>Thai people</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Challenges in Real-Time Prediction of Infectious Disease: A Case Study of Dengue in Thailand</article-title>
<alt-title alt-title-type="running-head">Real-Time Prediction of Dengue in Thailand</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0003-3503-9899</contrib-id>
<name name-style="western">
<surname>Reich</surname> <given-names>Nicholas G.</given-names></name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lauer</surname> <given-names>Stephen A.</given-names></name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Sakrejda</surname> <given-names>Krzysztof</given-names></name>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Iamsirithaworn</surname> <given-names>Sopon</given-names></name>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Hinjoy</surname> <given-names>Soawapak</given-names></name>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Suangtho</surname> <given-names>Paphanij</given-names></name>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Suthachana</surname> <given-names>Suthanun</given-names></name>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Clapham</surname> <given-names>Hannah E.</given-names></name>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Salje</surname> <given-names>Henrik</given-names></name>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Cummings</surname> <given-names>Derek A. T.</given-names></name>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lessler</surname> <given-names>Justin</given-names></name>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001">
<label>1</label>
<addr-line>Department of Biostatistics and Epidemiology, School of Public Health and Health Sciences, University of Massachusetts—Amherst, Amherst, Massachusetts, United States of America</addr-line>
</aff>
<aff id="aff002">
<label>2</label>
<addr-line>Department of Disease Control, Ministry of Public Health, Bangkok, Thailand</addr-line>
</aff>
<aff id="aff003">
<label>3</label>
<addr-line>Bureau of Epidemiology, Department of Disease Control, Ministry of Public Health, Bangkok, Thailand</addr-line>
</aff>
<aff id="aff004">
<label>4</label>
<addr-line>Department of Epidemiology, Johns Hopkins Bloomberg School of Public Health, Baltimore, Maryland, United States of America</addr-line>
</aff>
<aff id="aff005">
<label>5</label>
<addr-line>Emerging Pathogens Institute, Department of Biology, University of Florida, Gainesville, Florida, United States of America</addr-line>
</aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Scarpino</surname> <given-names>Samuel V.</given-names></name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1">
<addr-line>Santa Fe Institute, UNITED STATES</addr-line>
</aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<fn fn-type="con" id="contrib001">
<p>Conceived and designed the experiments: NGR SAL KS SI HEC HS DATC JL. Performed the experiments: NGR SAL KS JL. Analyzed the data: NGR SAL KS. Contributed reagents/materials/analysis tools: SI SH PS SS. Wrote the paper: NGR SAL KS JL. Supervised the surveillance activities and data management in Thailand: SI SH. Prepared and managed the data: KS PS SS.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">nick@umass.edu</email></corresp>
</author-notes>
<pub-date pub-type="collection">
<month>6</month>
<year>2016</year>
</pub-date>
<pub-date pub-type="epub">
<day>15</day>
<month>6</month>
<year>2016</year>
</pub-date>
<volume>10</volume>
<issue>6</issue>
<elocation-id>e0004761</elocation-id>
<history>
<date date-type="received">
<day>23</day>
<month>11</month>
<year>2015</year>
</date>
<date date-type="accepted">
<day>14</day>
<month>5</month>
<year>2016</year>
</date>
</history>
<permissions>
<copyright-year>2016</copyright-year>
<copyright-holder>Reich et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pntd.0004761"/>
<abstract>
<p>Epidemics of communicable diseases place a huge burden on public health infrastructures across the world. Producing accurate and actionable forecasts of infectious disease incidence at short and long time scales will improve public health response to outbreaks. However, scientists and public health officials face many obstacles in trying to create such real-time forecasts of infectious disease incidence. Dengue is a mosquito-borne virus that annually infects over 400 million people worldwide. We developed a real-time forecasting model for dengue hemorrhagic fever in the 77 provinces of Thailand. We created a practical computational infrastructure that generated multi-step predictions of dengue incidence in Thai provinces every two weeks throughout 2014. These predictions show mixed performance across provinces, out-performing seasonal baseline models in over half of provinces at a 2 month horizon. Additionally, to assess the degree to which delays in case reporting make long-range prediction a challenging task, we compared the performance of our real-time predictions with predictions made with fully reported data. This paper provides valuable lessons for the implementation of real-time predictions in the context of public health decision making.</p>
</abstract>
<abstract abstract-type="summary">
<title>Author Summary</title>
<p>Predicting the course of infectious disease outbreaks in real-time is a challenging task. It requires knowledge of the particular disease system as well as a pipeline that can turn raw data from a public health surveillance system into calibrated predictions of disease incidence. Dengue is a mosquito-borne infectious disease that places an immense public health and economic burden upon countries around the world, especially in tropical areas. In 2014 our research team, a collaboration of the Ministry of Public Health of Thailand and academic researchers from the United States, implemented a system for generating real-time forecasts of dengue hemorrhagic fever based on the disease surveillance reports from Thailand. We compared predictions from several different statistical models, identifying locations and times where our predictions were accurate. We also quantified the extent to which delayed reporting of cases in real-time impacted our predictions. Broadly speaking, improving real-time predictions can enable more targeted, timely interventions and risk communication, both of which have a measurable impact on disease spread in epidemic and pandemic scenarios. It is vital that we continue to build knowledge about the best ways to make these forecasts and integrate them into public health decision making.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100000060</institution-id>
<institution>National Institute of Allergy and Infectious Diseases</institution>
</institution-wrap>
</funding-source>
<award-id>R21AI115173</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Reich</surname> <given-names>Nicholas G.</given-names></name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100000060</institution-id>
<institution>National Institute of Allergy and Infectious Diseases</institution>
</institution-wrap>
</funding-source>
<award-id>R01AI102939</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Lessler</surname> <given-names>Justin</given-names></name>
</principal-award-recipient>
</award-group>
<funding-statement>This work was supported by the National Institute of Allergy and Infectious Diseases at the National Institutes of Health (grants R21AI115173 to NGR and DATC and R01AI102939). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="6"/>
<table-count count="3"/>
<page-count count="17"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>Model predictions of dengue fever data used and produced by the analyses in this manuscript are available online at <ext-link ext-link-type="uri" xlink:href="https://github.com/reichlab/dengue-thailand-2014-forecasts" xlink:type="simple">https://github.com/reichlab/dengue-thailand-2014-forecasts</ext-link>. Dengue surveillance data are owned by the Thailand Ministry of Public Health and were made available to the US-based researchers via a data transfer agreement. Requests for data access can be addressed to Dr Sopon Iamsirithaworn: <email xlink:type="simple">sopon@ddc.mail.go.th</email> or Dr. Yongjua Laosiritaworn: <email xlink:type="simple">yongjua@gmail.com.</email></meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Producing accurate and actionable forecasts of infectious disease incidence at short and long time scales will improve public health response to outbreaks. Real-time forecasts of infectious disease outbreaks can facilitate targeted intervention and prevention strategies, such as increased healthcare staffing or vector control measures. However, we currently have a limited understanding of the best ways to integrate forecasts into real-time public health decision-making.</p>
<p>Dengue is a mosquito-borne infectious disease that places an immense public health and economic burden upon countries around the world, especially in tropical areas. A severe form of the disease, dengue hemorrhagic fever (DHF), may lead to debilitating pain, organ shock, and even death [<xref ref-type="bibr" rid="pntd.0004761.ref001">1</xref>]. Currently over 2.5 billion individuals worldwide are at risk of infection with dengue, a mosquito-borne RNA virus [<xref ref-type="bibr" rid="pntd.0004761.ref002">2</xref>]. Global incidence of dengue has increased significantly over the past few decades, with estimated annual global incidence of about 400 million infections each year [<xref ref-type="bibr" rid="pntd.0004761.ref003">3</xref>].</p>
<p>Dengue is endemic in Thailand, which has 77 provinces including one large municipality (Bangkok). National annual incidence rates of reported dengue in Thailand range between 30 cases per 100,000 population and 224 cases per 100,000 population [<xref ref-type="bibr" rid="pntd.0004761.ref004">4</xref>]. Some estimates suggest that between 50–80% of cases are inapparent and hence are difficult to detect clinically and often go unreported [<xref ref-type="bibr" rid="pntd.0004761.ref005">5</xref>–<xref ref-type="bibr" rid="pntd.0004761.ref007">7</xref>]. Annual outbreaks show dynamic temporal and spatial patterns, with great year-to-year and across-province variation, making public health planning and resource allocation an ongoing challenge [<xref ref-type="bibr" rid="pntd.0004761.ref008">8</xref>, <xref ref-type="bibr" rid="pntd.0004761.ref009">9</xref>].</p>
<p>With the maturation of disease surveillance and reporting systems in recent years, real-time disease forecasting has become a realistic goal in some settings. Recognizing the importance of this emerging field, several governmental agencies have established disease prediction contests in recent years, with the goal of having contestants produce accurate forecasts: e.g. a 2013 CDC influenza prediction challenge [<xref ref-type="bibr" rid="pntd.0004761.ref010">10</xref>], a 2014 DARPA chikungunya prediction challenge [<xref ref-type="bibr" rid="pntd.0004761.ref011">11</xref>], and a 2015 National Science and Technology Council interagency Working Group dengue prediction challenge [<xref ref-type="bibr" rid="pntd.0004761.ref012">12</xref>]. However, researchers and practitioners are still working to understand and establish a set of best practices for implementing real-time prediction algorithms in practice.</p>
<p>Creating predictions in real-time poses logistical, computational, and statistical challenges. Logistically, raw data must be made available in a standard format for processing into analysis datasets. Historical data is also needed to allow for training of the prediction model(s). To enable transparent evaluations, predictions should be formally registered and archived in a publicly available database. Computational infrastructure is needed to transform and/or merge raw data into the analysis dataset and to run the models themselves. Analytical challenges include appropriate model training, selection, and validation, considering adjustments for delayed or incomplete case reporting. Depending on the methods used, additional statistical work may be necessary to accurately report uncertainty in the reported predictions. Below, we describe our approaches to dealing with these challenges.</p>
<p>In this manuscript we present the results from the first year of forecasting DHF across the 77 provinces in Thailand. In 2014 our research team, a collaboration of the Ministry of Public Health of Thailand and researchers from multiple academic institutions, implemented a system for generating real-time forecasts of DHF based on current disease surveillance reports from Thailand. This paper illustrates several key components of a rigorous real time prediction framework, including:</p>
<list list-type="bullet">
<list-item>
<p>a reliable pipeline for data transfer, cleaning, and analysis, with a data storage architecture that can recreate datasets that were available at a particular time (Section 2),</p>
</list-item>
<list-item>
<p>a statistical model of disease transmission used to generate real-time predictions of infectious disease incidence (Section 3),</p>
</list-item>
<list-item>
<p>an appropriate and rigorous model validation framework, including aggregating evaluations across location, calendar time, and prediction horizon (Section 4), and</p>
</list-item>
<list-item>
<p>an assessment of the impact of case reporting delays on the accuracy of predictions (Sections 3.3 and 4.2).</p>
</list-item>
</list>
<p>Valuable efforts have been made to create, validate, and operationalize real-time influenza predictions for the US [<xref ref-type="bibr" rid="pntd.0004761.ref013">13</xref>], although these efforts have not faced the same challenges of systematic delays in reported data. The infrastructure that we present in this manuscript provides valuable lessons for other collaborative prediction efforts between public health agencies and academic partners.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Methods</title>
<sec id="sec003">
<title>Data overview</title>
<p>The data presented here come from the national surveillance system run by the Ministry of Public Health in Thailand. Monthly dengue hemorrhagic fever (DHF) case counts for each province are available from January 1968 through December 2005. Individual case reports (hereafter referred to as “line-list” data) were available for dengue fever (DF), DHF, and dengue shock syndrome from January 1, 1999 through December 31, 2014. The line-list data contains information on each case, including date of symptom onset, home address-code of the case (similar to a U.S. zip code), disease diagnosis code, and demographic information (sex, marital status, age, etc.). In years where we had overlapping sources for case data, the line-list data were used. A summary of province-level characteristics for all provinces in Thailand is provided in Table C in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>. Since 1968, several provinces have split into multiple provinces. Details on how we accommodate these province separations are available in Table D in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>. In one instance, the counts associated with a province (Bueng Kan) that split from another (Nong Khai) in 2011 have continued to be counted with the original province since we do not yet have enough data to predict for the new province.</p>
<p>Theoretical work demonstrates that by choosing the generation time as the discrete time interval for case reporting, the case reports may more easily be used to model the reproductive rate of the disease [<xref ref-type="bibr" rid="pntd.0004761.ref014">14</xref>]. The generation time for dengue is two weeks, hence we aggregated the line-list data into biweekly intervals and interpolated the monthly counts into biweekly counts. (We used a definition of biweeks that followed a standardized definition based on calendar dates. See Table A in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>). Interpolation was performed by fitting a monotonically increasing smooth spline to the cumulative case counts in each province, and then taking the differences between the estimated cumulative counts at each interval as the number of incident cases in a given interval.</p>
<p>We chose to use only DHF cases because: (1) DHF is more consistently reported across the 47 years of data collection, (2) DHF is less likely than DF to be misdiagnosed or to be differentially detected over time, and (3) from a public health perspective, DHF is a more relevant outcome, as it is a life-threatening condition and requires medical attention.</p>
<p>The analysis was conducted using the R language. [<xref ref-type="bibr" rid="pntd.0004761.ref015">15</xref>] Data and code related to the analysis are available at <ext-link ext-link-type="uri" xlink:href="https://github.com/reichlab/dengue-thailand-2014-forecasts" xlink:type="simple">https://github.com/reichlab/dengue-thailand-2014-forecasts</ext-link>.</p>
</sec>
<sec id="sec004">
<title>Ethics statement</title>
<p>The research aspects of this study were approved by the Johns Hopkins Bloomberg School of Public Health and University of Massachusetts Amherst institutional review boards. Patient data was analyzed anonymously.</p>
</sec>
<sec id="sec005">
<title>Real-time data management</title>
<p>We established a secure data transfer process to transmit data from the Thai disease surveillance system to U.S. researchers. Throughout the 2014 calendar year, Thai public health officials transmitted data approximately every two weeks to a secure server based in Baltimore, Maryland (Table B in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>). These data were then loaded into a PostgreSQL database containing all of the data, including monthly case counts and a table with all line-list data received to date. The final report containing a cleaned and finalized record of all cases for the 2014 season was delivered in April 2015. As of that time, this database held records of 2,503,631 unique cases of dengue in Thailand for the years 1968 through 2014, including records of 2,029,326 DHF cases (<xref ref-type="fig" rid="pntd.0004761.g001">Fig 1</xref>).</p>
<fig id="pntd.0004761.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Raw dengue hemorrhagic fever case counts for 77 provinces of Thailand across 47 years (1968–2014).</title>
<p>Provinces are ordered by by population (larger populations on the top). Gray regions indicate periods of time when a province was not in existence.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.g001" xlink:type="simple"/>
</fig>
<p>When forecasting, we will only ever have the cases recorded prior to the time the predictions are made. So that we could compare the expected real-time performance of models as if they had been applied in real-time, all data were archived in the database with a time-stamp on arrival. This enabled researchers to “turn back the clock”, i.e. to query data that was available at a particular point in time. We refer to an “analysis date” as the date at which a multi-step forecast was made, using available data. Throughout this manuscript, we use the term “nowcast” to refer to predictions made for timepoints on or prior to the analysis date and “forecasts” to refer to predictions made for timepoints at or after the analysis date.</p>
</sec>
<sec id="sec006">
<title>Accounting for delays in case reporting</title>
<p>A key property of a surveillance system is the reporting delay, defined for our purposes as the duration of time between symptom onset and the case being available for analysis. During 2014 reporting delays for dengue ranged from 1 to 50 weeks. This was due to the process of reporting cases. Case reports typically follow a path of reporting from hospitals to district surveillance centers and then to provincial health offices before arriving at the national surveillance center. In all provinces, 50% of cases were reported within 5 weeks and 75% of cases were reported within 6 weeks. However, a small fraction of cases took quite a bit longer. To account for reporting delays, our models specified a reporting lag <italic>l</italic>, in biweeks. Data with onset dates within last <italic>l</italic> biweeks were considered to be not fully reported and left out from the analysis. We present results from the models with a lag of 6 biweeks (about 3 months), as these produced stable predictions across the entire country. More sophisticated adjustments for reporting delays are the subject of our team’s ongoing research.</p>
</sec>
<sec id="sec007">
<title>Timing of predictions</title>
<p>While the predictions presented in this manuscript were made retrospectively, in 2015 when complete data were available, they were constructed to mimic real-time predictions by using only the data available at each analysis date in 2014. During the 2014 calendar year, predictions from a similar model were generated in real-time and disseminated to the Thai Ministry of Public Health. We chose the set of analysis dates as the first day of each biweek for which data had been delivered in the previous biweek (Table B in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>). For each analysis date in 2014, we used the candidate model to generate “real-time” province-level biweekly predictions for the subsequent 10 biweeks (5 months).</p>
</sec>
<sec id="sec008">
<title>Disease model: Features and estimation</title>
<sec id="sec009">
<title>Statistical model</title>
<p>We assumed the biweekly province-level reported cases follow a Poisson distribution, where the previous biweek’s reported cases serve as an offset term. Let the number of cases with onset occurring within time interval <italic>t</italic> in province <italic>i</italic> be represented as a random variable <italic>Y</italic><sub><italic>t</italic>, <italic>i</italic></sub>, then
<disp-formula id="pntd.0004761.e001"><alternatives><graphic id="pntd.0004761.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e001" xlink:type="simple"/><mml:math display="block" id="M1"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:msub><mml:mi>Y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mtd> <mml:mtd><mml:mo>∼</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:mi>P</mml:mi> <mml:mi>o</mml:mi> <mml:mi>i</mml:mi> <mml:mi>s</mml:mi> <mml:mi>s</mml:mi> <mml:mi>o</mml:mi> <mml:mi>n</mml:mi> <mml:mo>(</mml:mo> <mml:msub><mml:mi>λ</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub> <mml:mo>·</mml:mo> <mml:msub><mml:mi>y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>-</mml:mo> <mml:mn>1</mml:mn> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub> <mml:mo>)</mml:mo></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives></disp-formula>
where the lag-1 term <italic>y</italic><sub><italic>t</italic> − 1, <italic>i</italic></sub> is used as an offset in this model. We adopt the convention of using lower-case <italic>y</italic><sub><italic>t</italic>, <italic>i</italic></sub> to indicate previously observed case counts that are treated as fixed inputs in our model. We explicitly model the rate <italic>λ</italic> as
<disp-formula id="pntd.0004761.e002"><alternatives><graphic id="pntd.0004761.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e002" xlink:type="simple"/><mml:math display="block" id="M2"><mml:mtable displaystyle="true"><mml:mtr><mml:mtd columnalign="right"><mml:mrow><mml:mo form="prefix">log</mml:mo> <mml:msub><mml:mi>λ</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:mtd> <mml:mtd><mml:mo>=</mml:mo></mml:mtd> <mml:mtd columnalign="left"><mml:mrow><mml:msub><mml:mi>f</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mrow><mml:mo>(</mml:mo> <mml:mi>b</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:mi>t</mml:mi> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>+</mml:mo> <mml:msub><mml:mi>g</mml:mi> <mml:mi>i</mml:mi></mml:msub> <mml:mrow><mml:mo>(</mml:mo> <mml:mi>t</mml:mi> <mml:mo>)</mml:mo></mml:mrow> <mml:mo>+</mml:mo> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>j</mml:mi> <mml:mo>∈</mml:mo> <mml:mi mathvariant="script">C</mml:mi></mml:mrow></mml:munder> <mml:munder><mml:mo>∑</mml:mo> <mml:mrow><mml:mi>k</mml:mi> <mml:mo>∈</mml:mo> <mml:mi mathvariant="script">L</mml:mi></mml:mrow></mml:munder> <mml:msub><mml:mi>α</mml:mi> <mml:mrow><mml:mi>j</mml:mi> <mml:mo>,</mml:mo> <mml:mi>k</mml:mi></mml:mrow></mml:msub> <mml:mo form="prefix">log</mml:mo> <mml:mfrac><mml:mrow><mml:msub><mml:mi>y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>-</mml:mo> <mml:mi>k</mml:mi> <mml:mo>,</mml:mo> <mml:mi>j</mml:mi></mml:mrow></mml:msub> <mml:mo>+</mml:mo> <mml:mn>1</mml:mn></mml:mrow> <mml:mrow><mml:msub><mml:mi>y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>-</mml:mo> <mml:mi>k</mml:mi> <mml:mo>-</mml:mo> <mml:mn>1</mml:mn> <mml:mo>,</mml:mo> <mml:mi>j</mml:mi></mml:mrow></mml:msub> <mml:mo>+</mml:mo> <mml:mn>1</mml:mn></mml:mrow></mml:mfrac></mml:mrow></mml:mtd></mml:mtr></mml:mtable></mml:math></alternatives> <label>(1)</label></disp-formula>
where <inline-formula id="pntd.0004761.e003"><alternatives><graphic id="pntd.0004761.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e003" xlink:type="simple"/><mml:math display="inline" id="M3"><mml:mi mathvariant="script">C</mml:mi></mml:math></alternatives></inline-formula> is the set of <italic>J</italic> most-correlated provinces with province <italic>i</italic> and <inline-formula id="pntd.0004761.e004"><alternatives><graphic id="pntd.0004761.e004g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e004" xlink:type="simple"/><mml:math display="inline" id="M4"><mml:mi mathvariant="script">L</mml:mi></mml:math></alternatives></inline-formula> is the set of lag times used in the model; <italic>b</italic>(<italic>t</italic>) is the biweek of time <italic>t</italic>; <italic>f</italic><sub><italic>i</italic></sub>(<italic>b</italic>(<italic>t</italic>)) is assumed to be a province-specific cyclical cubic spline with period of one year (i.e. 26 biweeks); and <italic>g</italic><sub><italic>i</italic></sub>(<italic>t</italic>) is a province-specific smooth spline to capture secular trends over time. Adding 1 to the numerator and denominator of the correlated province covariates ensures that the quantities are defined when no case counts are observed at a particular province-biweek. This method of adjusting for zero counts has been interpreted as an “immigration rate” added to each observation [<xref ref-type="bibr" rid="pntd.0004761.ref016">16</xref>].</p>
<p>We note that the model can be expressed as
<disp-formula id="pntd.0004761.e005"><alternatives><graphic id="pntd.0004761.e005g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e005" xlink:type="simple"/><mml:math display="block" id="M5"><mml:mrow><mml:msub><mml:mi>λ</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub> <mml:mo>=</mml:mo> <mml:mi mathvariant="double-struck">E</mml:mi> <mml:mfenced close="]" open=" [" separators=""><mml:mfrac><mml:msub><mml:mi>Y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub> <mml:msub><mml:mi>y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>-</mml:mo> <mml:mn>1</mml:mn> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mfrac> <mml:mrow><mml:mo>|</mml:mo></mml:mrow> <mml:msub><mml:mi>y</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>-</mml:mo> <mml:mn>1</mml:mn> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mfenced> <mml:mo>≈</mml:mo> <mml:msub><mml:mi>R</mml:mi> <mml:mrow><mml:mi>t</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives> <label>(2)</label></disp-formula>
which shows that <italic>λ</italic><sub><italic>t</italic>, <italic>i</italic></sub> can be interpreted as the expected reproductive rate at time <italic>t</italic> in location <italic>i</italic>, or <italic>R</italic><sub><italic>t</italic>, <italic>i</italic></sub> [<xref ref-type="bibr" rid="pntd.0004761.ref014">14</xref>].</p>
<p>These models were fit using the Generalized Additive Model (GAM) framework (i.e. as generalized linear models with smooth splines estimated by penalized maximum likelihood) [<xref ref-type="bibr" rid="pntd.0004761.ref017">17</xref>], using the mgcv package for R [<xref ref-type="bibr" rid="pntd.0004761.ref015">15</xref>, <xref ref-type="bibr" rid="pntd.0004761.ref018">18</xref>]. Each province’s time-series was subset to remove any cases from the previous <italic>l</italic> biweeks. The remaining data were smoothed before fitting the model and making predictions.</p>
<p>Seasonal patterns were modeled using a penalized cubic regression spline, constrained to have a cycle of one year with continuous second derivatives at the endpoints. Secular trends were modeled using penalized cubic splines with 5 equally spaced knots over 47 years (roughly one knot per decade).</p>
<p>Information on epidemic progression elsewhere in the country was taken into account by including reported case counts at 1 lagged timepoint for the 3 most correlated provinces with province <italic>i</italic> in the data used to fit the model. Details of this model selection are provided in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>.</p>
<p>We approximated the predictive distribution for all provinces using sequential stochastic simulations of the joint distribution of the case counts for each province. We created <italic>M</italic> independently evolving sequential chains of predictions by drawing, at each prediction time point, from the province-specific Poisson distribution with means given by <xref ref-type="disp-formula" rid="pntd.0004761.e002">eq (1)</xref>. For example, if data through time <italic>t</italic>* was used to fit the models for all locations, then a single simulated prediction consisted of a simulated Markov chain of dependent observations for timepoints <italic>t</italic>* + 1, <italic>t</italic>* + 2, …, <italic>t</italic>* + <italic>H</italic>, across all provinces, where H was the largest horizon considered. To make a prediction for province <italic>i</italic> at time <italic>t</italic>* + <italic>h</italic> in the <italic>m</italic><sup><italic>th</italic></sup> chain, we draw
<disp-formula id="pntd.0004761.e006"><alternatives><graphic id="pntd.0004761.e006g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e006" xlink:type="simple"/><mml:math display="block" id="M6"><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi> <mml:mo>^</mml:mo></mml:mover> <mml:mrow><mml:msup><mml:mi>t</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>+</mml:mo> <mml:mi>h</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow> <mml:mi>m</mml:mi></mml:msubsup> <mml:mo>∼</mml:mo> <mml:mi>P</mml:mi> <mml:mi>o</mml:mi> <mml:mi>i</mml:mi> <mml:mi>s</mml:mi> <mml:mi>s</mml:mi> <mml:mi>o</mml:mi> <mml:mi>n</mml:mi> <mml:mrow><mml:mo>(</mml:mo> <mml:msubsup><mml:mover accent="true"><mml:mi>λ</mml:mi> <mml:mo>^</mml:mo></mml:mover> <mml:mrow><mml:msup><mml:mi>t</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>+</mml:mo> <mml:mi>h</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow> <mml:mi>m</mml:mi></mml:msubsup> <mml:mo>·</mml:mo> <mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi> <mml:mo>^</mml:mo></mml:mover> <mml:mrow><mml:msup><mml:mi>t</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>+</mml:mo> <mml:mi>h</mml:mi> <mml:mo>-</mml:mo> <mml:mn>1</mml:mn> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow> <mml:mi>m</mml:mi></mml:msubsup> <mml:mo>)</mml:mo></mml:mrow></mml:mrow></mml:math></alternatives></disp-formula>
where <inline-formula id="pntd.0004761.e007"><alternatives><graphic id="pntd.0004761.e007g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e007" xlink:type="simple"/><mml:math display="inline" id="M7"><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>λ</mml:mi> <mml:mo>^</mml:mo></mml:mover> <mml:mrow><mml:msup><mml:mi>t</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>+</mml:mo> <mml:mi>h</mml:mi> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow> <mml:mi>m</mml:mi></mml:msubsup></mml:mrow></mml:math></alternatives></inline-formula> is computed directly by plugging in the observed and predicted data prior to <italic>t</italic>* + <italic>h</italic> to the fitted model, and we use observed case data in the first step of prediction, i.e. <inline-formula id="pntd.0004761.e008"><alternatives><graphic id="pntd.0004761.e008g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pntd.0004761.e008" xlink:type="simple"/><mml:math display="inline" id="M8"><mml:mrow><mml:msubsup><mml:mover accent="true"><mml:mi>y</mml:mi> <mml:mo>^</mml:mo></mml:mover> <mml:mrow><mml:msup><mml:mi>t</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow> <mml:mi>m</mml:mi></mml:msubsup> <mml:mo>=</mml:mo> <mml:msub><mml:mi>y</mml:mi> <mml:mrow><mml:msup><mml:mi>t</mml:mi> <mml:mo>*</mml:mo></mml:msup> <mml:mo>,</mml:mo> <mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow></mml:math></alternatives></inline-formula> for all <italic>m</italic>. Due to the assumed interrelations between the provinces, we simulated counts for all provinces at a single timepoint before moving on to the next timepoint. For a given prediction horizon <italic>h</italic>, this process generates an empirical posterior predictive distribution for each province by evaluating the <italic>M</italic> different predictions for <italic>y</italic><sub><italic>t</italic>*+<italic>h</italic>, <italic>i</italic></sub>. Prediction intervals are generated by taking quantiles (e.g., the 2.5% and 97.5%) of this distribution.</p>
</sec>
<sec id="sec010">
<title>Metrics for evaluating predictions</title>
<p>We used several different metrics for evaluating our predicted case counts. We calculated Spearman correlation coefficients to measure the agreement between predicted and observed values. We also calculated the mean absolute error (MAE) by aggregating across analysis times within a given province. We computed the relative mean absolute error (relative MAE) comparing the predictions for a given province to predictions from a seasonal median baseline model. The seasonal baseline model for a given province is the median value of previously observed counts for the given biweek in that province over the past 10 years. The use of absolute error metrics over squared error metrics has been encouraged to enhance interpretability [<xref ref-type="bibr" rid="pntd.0004761.ref019">19</xref>, <xref ref-type="bibr" rid="pntd.0004761.ref020">20</xref>]. Additionally, we calculated empirical 95% prediction interval coverage as the fraction of times the 95% prediction interval covered the true value.</p>
</sec>
<sec id="sec011">
<title>Real-time vs. full-data predictions</title>
<p>We evaluated the performance of our real-time forecasts against predictions that could have been made had a full dataset been available at the analysis dates. To make this comparison, we ran a set of multi-step forecasts for 2014 at each analysis date using the complete data for 2014 that was finalized in late April 2015. We designed this experiment to focus on two comparisons. First, we aimed to compare real-time and full-data predictions where the multi-step predictions began at the same timepoint (<xref ref-type="fig" rid="pntd.0004761.g002">Fig 2A vs. 2B</xref>). This analysis addressed the question of how much the real-time predictions were impacted by the delays in case reporting, even after beginning the predictions 3 months in the past. Second, we aimed to compare, by horizon, the real-time and full-data predictions where the origin of the multi-step full-data predictions was anchored at the analysis time but the origin of the real-time predictions was 6 biweeks earlier to account for delayed reporting of case data (<xref ref-type="fig" rid="pntd.0004761.g002">Fig 2A vs. 2C</xref>). This analysis addressed the question of how much better or worse our model would have performed if full data were available without any delays.</p>
<fig id="pntd.0004761.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.g002</object-id>
<label>Fig 2</label>
<caption>
<title>This figure illustrates three different methods used to create forecasts.</title>
<p>Panel A shows predictions made using only data that was available at the analysis time, and ignoring the most recent six biweeks of reported cases. Panel B shows predictions that used fully observed data (including data that was not available at the analysis time) but still ignored cases from the six biweeks preceding the analysis time. Panel C shows predictions that could have been made at the analysis time if no reporting delays existed and all data that eventually was reported had been available in real-time.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.g002" xlink:type="simple"/>
</fig>
</sec>
</sec>
</sec>
<sec id="sec012" sec-type="results">
<title>Results</title>
<sec id="sec013">
<title>Summary of province-level forecasts</title>
<p>In general, the model predictions showed good, if overconfident, performance at short horizons but less accuracy and high uncertainty at longer horizons. Across all provinces, the correlation between observed and predicted values was 0.89 at a horizon of 1 biweek (2 weeks) and 0.33 at a horizon of 10 biweeks, or approximately 5 months (see <xref ref-type="table" rid="pntd.0004761.t001">Table 1</xref>). Across all provinces, observed 95% prediction interval coverage was lower than expected at a horizon of 1 biweek (78%), showing that the models were overconfident in their short-term predictions. This prediction interval coverage increased to 97% at 4 and 5 biweeks (2-2.5 month) prediction horizon, and was near 95% for longer horizons. This indicates that our models often had an abundance of uncertainty at mid-term horizons. <xref ref-type="fig" rid="pntd.0004761.g003">Fig 3</xref> shows case counts and predictions aggregated across all provinces at horizons of 1, 2, and 3 biweeks (2, 4, and 6 weeks).</p>
<table-wrap id="pntd.0004761.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.t001</object-id>
<label>Table 1</label>
<caption>
<title>Summary of real-time prediction accuracy, by prediction horizon.</title>
<p>These results are aggregated across all provinces. The <italic>R</italic><sup>2</sup> and 95% PI coverage columns present the overall correlation coefficient and prediction interval coverage. The relative MAE columns show five quantiles of the distribution of province-level relative MAEs comparing the real-time model at the given horizon to a seasonal baseline model at the given horizon: <italic>Q</italic><sub>5</sub> (the 5th percentile), <italic>Q</italic><sub>25</sub> (25th percentile), <italic>Q</italic><sub>50</sub> (median), <italic>Q</italic><sub>75</sub> (75th percentile), and <italic>Q</italic><sub>95</sub> (the 95th percentile). The relative MAEs were calculated as the MAE from the real-time predictions divided by the MAE from the seasonal average predictions, therefore values larger than 1 indicate that the real-time models showed more absolute error on average than the seasonal models.</p>
</caption>
<alternatives>
<graphic id="pntd.0004761.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="justify" colspan="3"/>
<th align="center" colspan="5">relative MAE (real-time vs. seasonal baseline)</th>
</tr>
<tr>
<th align="center">horizon (h)</th>
<th align="center"><italic>R</italic><sup>2</sup></th>
<th align="center">95% PI coverage</th>
<th align="center"><italic>Q</italic><sub>5</sub></th>
<th align="center"><italic>Q</italic><sub>25</sub></th>
<th align="center"><italic>Q</italic><sub>50</sub> (median)</th>
<th align="center"><italic>Q</italic><sub>75</sub></th>
<th align="center"><italic>Q</italic><sub>95</sub></th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">1</td>
<td align="char" char=".">0.89</td>
<td align="char" char=".">0.78</td>
<td align="char" char=".">0.13</td>
<td align="char" char=".">0.25</td>
<td align="char" char=".">0.38</td>
<td align="char" char=".">0.75</td>
<td align="char" char=".">1.26</td>
</tr>
<tr>
<td align="center">2</td>
<td align="char" char=".">0.83</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">0.19</td>
<td align="char" char=".">0.30</td>
<td align="char" char=".">0.47</td>
<td align="char" char=".">0.84</td>
<td align="char" char=".">1.68</td>
</tr>
<tr>
<td align="center">3</td>
<td align="char" char=".">0.73</td>
<td align="char" char=".">0.95</td>
<td align="char" char=".">0.27</td>
<td align="char" char=".">0.38</td>
<td align="char" char=".">0.65</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">1.79</td>
</tr>
<tr>
<td align="center">4</td>
<td align="char" char=".">0.59</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.33</td>
<td align="char" char=".">0.48</td>
<td align="char" char=".">0.78</td>
<td align="char" char=".">1.15</td>
<td align="char" char=".">1.91</td>
</tr>
<tr>
<td align="center">5</td>
<td align="char" char=".">0.48</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.37</td>
<td align="char" char=".">0.57</td>
<td align="char" char=".">0.98</td>
<td align="char" char=".">1.51</td>
<td align="char" char=".">2.14</td>
</tr>
<tr>
<td align="center">6</td>
<td align="char" char=".">0.41</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.44</td>
<td align="char" char=".">0.73</td>
<td align="char" char=".">1.15</td>
<td align="char" char=".">1.82</td>
<td align="char" char=".">2.69</td>
</tr>
<tr>
<td align="center">7</td>
<td align="char" char=".">0.36</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">0.53</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">1.32</td>
<td align="char" char=".">2.09</td>
<td align="char" char=".">3.24</td>
</tr>
<tr>
<td align="center">8</td>
<td align="char" char=".">0.34</td>
<td align="char" char=".">0.95</td>
<td align="char" char=".">0.55</td>
<td align="char" char=".">0.96</td>
<td align="char" char=".">1.55</td>
<td align="char" char=".">2.29</td>
<td align="char" char=".">4.07</td>
</tr>
<tr>
<td align="center">9</td>
<td align="char" char=".">0.33</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.66</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">1.77</td>
<td align="char" char=".">2.78</td>
<td align="char" char=".">4.88</td>
</tr>
<tr>
<td align="center">10</td>
<td align="char" char=".">0.33</td>
<td align="char" char=".">0.93</td>
<td align="char" char=".">0.70</td>
<td align="char" char=".">1.07</td>
<td align="char" char=".">1.95</td>
<td align="char" char=".">3.08</td>
<td align="char" char=".">5.45</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<fig id="pntd.0004761.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Country-wide real-time predictions for incident dengue hemorrhagic fever.</title>
<p>Red lines show predicted case counts, black bars show cases reported by the end of the 2014 reporting period. The three figures show (top to bottom) one-, two-, and three-biweek ahead predictions. So, for example, every dot on the top graph is a one-biweek ahead real-time prediction made from all available data at the time of analysis.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.g003" xlink:type="simple"/>
</fig>
<p>
<xref ref-type="fig" rid="pntd.0004761.g004">Fig 4</xref> shows examples of multi-step predictions from two analysis dates in 2014. We show the results from nine distinct provinces, representing the best three provinces, the middle three provinces, and the worst three provinces in terms of relative MAE when compared to a seasonal baseline model. The increasing uncertainty is visible in many cases, even when the point-predictions remain close to the true values. The explosive forecasts tended to occur more frequently in the early- and mid-season, when the historical seasonal trend rises and when the observed case counts tend to be increasing from one biweek to the next.</p>
<fig id="pntd.0004761.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Ten-step forward predictions made with available data at two time-points in 2014 (each time indicated by a vertical dashed line).</title>
<p>Results for nine provinces are shown, representing (from top to bottom) the best three provinces, the middle three, and the worst three performing provinces in terms of relative mean absolute error when compared to a seasonal baseline model.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.g004" xlink:type="simple"/>
</fig>
<p>There was substantial variation in predictive performance across provinces. Mean absolute error (MAE) for predictions tended to be larger in provinces with higher populations (<xref ref-type="fig" rid="pntd.0004761.g005">Fig 5</xref>), and also tended to increase with the forecast horizon. The observed MAE was less than 10 cases in over 90% of provinces at one time step and in over 50% of provinces at up to 6 time steps. <xref ref-type="fig" rid="pntd.0004761.g006">Fig 6</xref> shows the relative MAE of model predictions compared to a seasonal baseline model at prediction horizons of 1 through 10 biweeks (2 weeks through 5 months). We note that predictions during the first three months are nowcasts, as the most recent 6 biweeks of data are ignored in the fitting process and predictions are made starting from the point at which full data was assumed.</p>
<fig id="pntd.0004761.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Mean absolute error (MAE) of our prediction model by province and step forward (in biweeks).</title>
<p>Provinces are sorted by population, with the most populous at the top of the figure.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.g005" xlink:type="simple"/>
</fig>
<fig id="pntd.0004761.g006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.g006</object-id>
<label>Fig 6</label>
<caption>
<title>Relative mean absolute error (MAE) comparing our prediction model vs. a model that predicts a seasonal median, by province and step forward (in biweeks).</title>
<p>Results to the left of the dotted line signify more accurate predictions from our models when compared to the seasonal model, and results to the right indicate less accurate predictions.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.g006" xlink:type="simple"/>
</fig>
<p>To compare predictive performance of our model between provinces, we used the relative MAE with a simple seasonal model as a baseline. <xref ref-type="table" rid="pntd.0004761.t001">Table 1</xref> summarizes relative MAEs by prediction horizon. Relative to seasonal baseline prediction models, a majority of provinces made better predictions on average than the seasonal model at 1, 2, 3 and 4 biweek (2, 4, 6 and 8 week) prediction horizons (i.e. up to 2 months from the starting point of the predictions). Up to about 5 months from the origin of the multi-step predictions (and two months from the analysis time), over 15% of province-specific models made predictions that were on average better than the seasonal baseline model. Some province-specific models showed substantially worse predictions when compared to a seasonal baseline at these longer prediction horizons. No single province feature (e.g. total average cases, strength of seasonal trends, population size, season-to-season variation) was able to explain the substantial variations in performance, highlighting the challenges of creating a unified modeling framework for a set of varied locations (see <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref>).</p>
</sec>
<sec id="sec014">
<title>Comparing real-time to full-data predictions</title>
<p>We compared real-time and full-data predictions that began at the same timepoint (<xref ref-type="fig" rid="pntd.0004761.g002">Fig 2A vs. 2B</xref>). This analysis can help answer the question of how much the real-time predictions that removed the most recent 3 months of data were impacted by the delays in case reporting. As shown in <xref ref-type="table" rid="pntd.0004761.t002">Table 2</xref>, these analyses demonstrated that once we went back 3 months to begin the nowcasting, more than 50% of the provinces had more accurate real-time forecasts than full-data forecasts at all prediction horizons up to 1.5 months. This suggests that inaccuracies in the real-time predictions once those recent 3 months are discarded are driven less by the reporting delays than they are by model misspecification and other background noise in the data.</p>
<table-wrap id="pntd.0004761.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.t002</object-id>
<label>Table 2</label>
<caption>
<title>Comparison of province-level prediction accuracy between full-data and real-time predictions, by prediction horizon.</title>
<p>These calculations assume that both the full-data and real-time multi-step predictions began at the same time. The table shows the 5th percentile (<italic>Q</italic><sub>5</sub>), 25th percentile (<italic>Q</italic><sub>25</sub>), median (<italic>Q</italic><sub>50</sub>), 75th percentile (<italic>Q</italic><sub>75</sub>), and 95th percentile (<italic>Q</italic><sub>95</sub>) value of the relative MAE from each province at the given horizon. The relative MAEs were calculated as the MAE from the real-time predictions divided by the MAE from the full-data predictions, i.e. values larger than 1 indicate that the real-time models showed more absolute error on average than the full-data models.</p>
</caption>
<alternatives>
<graphic id="pntd.0004761.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center" rowspan="2">horizon</th>
<th align="center" colspan="5">Relative MAE (real-time vs. baseline scores)</th>
</tr>
<tr>
<th align="center"><italic>Q</italic><sub>5</sub></th>
<th align="center"><italic>Q</italic><sub>25</sub></th>
<th align="center"><italic>Q</italic><sub>50</sub> (median)</th>
<th align="center"><italic>Q</italic><sub>75</sub></th>
<th align="center"><italic>Q</italic><sub>95</sub></th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">1</td>
<td align="char" char=".">0.78</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">1.00</td>
<td align="char" char=".">1.06</td>
<td align="char" char=".">1.20</td>
</tr>
<tr>
<td align="center">2</td>
<td align="char" char=".">0.74</td>
<td align="char" char=".">0.87</td>
<td align="char" char=".">0.95</td>
<td align="char" char=".">1.02</td>
<td align="char" char=".">1.17</td>
</tr>
<tr>
<td align="center">3</td>
<td align="char" char=".">0.77</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">0.97</td>
<td align="char" char=".">1.03</td>
<td align="char" char=".">1.11</td>
</tr>
<tr>
<td align="center">4</td>
<td align="char" char=".">0.82</td>
<td align="char" char=".">0.95</td>
<td align="char" char=".">1.02</td>
<td align="char" char=".">1.08</td>
<td align="char" char=".">1.19</td>
</tr>
<tr>
<td align="center">5</td>
<td align="char" char=".">0.83</td>
<td align="char" char=".">0.96</td>
<td align="char" char=".">1.04</td>
<td align="char" char=".">1.13</td>
<td align="char" char=".">1.29</td>
</tr>
<tr>
<td align="center">6</td>
<td align="char" char=".">0.87</td>
<td align="char" char=".">1.02</td>
<td align="char" char=".">1.08</td>
<td align="char" char=".">1.21</td>
<td align="char" char=".">1.45</td>
</tr>
<tr>
<td align="center">7</td>
<td align="char" char=".">0.88</td>
<td align="char" char=".">1.01</td>
<td align="char" char=".">1.13</td>
<td align="char" char=".">1.28</td>
<td align="char" char=".">1.71</td>
</tr>
<tr>
<td align="center">8</td>
<td align="char" char=".">0.88</td>
<td align="char" char=".">1.00</td>
<td align="char" char=".">1.15</td>
<td align="char" char=".">1.35</td>
<td align="char" char=".">1.97</td>
</tr>
<tr>
<td align="center">9</td>
<td align="char" char=".">0.90</td>
<td align="char" char=".">1.01</td>
<td align="char" char=".">1.16</td>
<td align="char" char=".">1.38</td>
<td align="char" char=".">2.22</td>
</tr>
<tr>
<td align="center">10</td>
<td align="char" char=".">0.88</td>
<td align="char" char=".">1.01</td>
<td align="char" char=".">1.19</td>
<td align="char" char=".">1.42</td>
<td align="char" char=".">2.45</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>A second analysis compared real-time predictions with a horizon of 7 biweeks with full-data predictions at 1 biweek (<xref ref-type="fig" rid="pntd.0004761.g002">Fig 2A vs. 2C</xref>). This analysis can tell us how much better or worse our model would have done if we did not need to adjust for delays in case reporting by dropping the past 3 months, i.e. if all of our data were available at the time of analysis. We refer to this realignment of horizons as the absolute horizon, to suggest that a real-time prediction that removes 6 biweeks of data and then projects 7 steps forward (<xref ref-type="fig" rid="pntd.0004761.g002">Fig 2A</xref>) is predicting the same timestep as a full-data prediction that does not remove any data and just projects 1 biweek forward (<xref ref-type="fig" rid="pntd.0004761.g002">Fig 2C</xref>). Results from this analysis are shown in <xref ref-type="table" rid="pntd.0004761.t003">Table 3</xref> for absolute horizons of 1 through 4 biweeks. Overall, 74 of the 76 provinces (97%) showed better average performance in the full-data forecasts at 1 step ahead than the real-time forecasts at 7 steps ahead (i.e. had a relative MAE of greater than 1). In over 90% of the provinces at each absolute horizon the full-data forecasts were on average closer to the true value than the real-time forecasts. However across all the absolute horizons, for between 2 and 7 provinces the full-data predictions had more error than the real-time predictions. Full-data predictions under-performed real-time predictions in a small number of provinces, reflecting the challenges of making predictions in such a noisy system. A sample of predictions by province and analysis date are provided in <xref ref-type="supplementary-material" rid="pntd.0004761.s001">S1 Appendix</xref> to illustrate this challenge.</p>
<table-wrap id="pntd.0004761.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pntd.0004761.t003</object-id>
<label>Table 3</label>
<caption>
<title>Comparison of province-level prediction accuracy between full-data and real-time predictions, by prediction horizon.</title>
<p>These results were computed comparing predictions as if the full data was available at the analysis time with the real-time predictions that build in a 6-biweek (approximately 3 month) buffer to account for delayed case data. The table shows the 5th percentile (<italic>Q</italic><sub>5</sub>), 25th percentile (<italic>Q</italic><sub>25</sub>), median (<italic>Q</italic><sub>50</sub>), 75th percentile (<italic>Q</italic><sub>75</sub>), and 95th percentile (<italic>Q</italic><sub>95</sub>) value of the relative MAE from each province at the given horizon. The relative MAEs were calculated as the MAE from the real-time predictions divided by the MAE from the full-data predictions, i.e. values larger than 1 indicate that the real-time models showed more absolute error on average than the full-data models.</p>
</caption>
<alternatives>
<graphic id="pntd.0004761.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.t003" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center" rowspan="2">Absolute horizon</th>
<th align="center" colspan="5">Relative MAE (real-time vs. baseline scores)</th>
</tr>
<tr>
<th align="center"><italic>Q</italic><sub>5</sub></th>
<th align="center"><italic>Q</italic><sub>25</sub></th>
<th align="center"><italic>Q</italic><sub>50</sub> (median)</th>
<th align="center"><italic>Q</italic><sub>75</sub></th>
<th align="center"><italic>Q</italic><sub>95</sub></th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">1</td>
<td align="char" char=".">1.37</td>
<td align="char" char=".">1.77</td>
<td align="char" char=".">2.81</td>
<td align="char" char=".">5.55</td>
<td align="char" char=".">11.26</td>
</tr>
<tr>
<td align="center">2</td>
<td align="char" char=".">1.01</td>
<td align="char" char=".">1.61</td>
<td align="char" char=".">2.89</td>
<td align="char" char=".">4.88</td>
<td align="char" char=".">12.23</td>
</tr>
<tr>
<td align="center">3</td>
<td align="char" char=".">0.98</td>
<td align="char" char=".">1.68</td>
<td align="char" char=".">2.94</td>
<td align="char" char=".">4.25</td>
<td align="char" char=".">9.55</td>
</tr>
<tr>
<td align="center">4</td>
<td align="char" char=".">0.92</td>
<td align="char" char=".">1.83</td>
<td align="char" char=".">2.86</td>
<td align="char" char=".">4.14</td>
<td align="char" char=".">8.74</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
</sec>
<sec id="sec015" sec-type="conclusions">
<title>Discussion</title>
<p>We present the prediction results from our real-time prediction infrastructure established for dengue hemorrhagic fever in Thailand. This infrastructure addresses several key practical features of real-time predictions, including real-time data management, the impact of reporting delays, and incorporating a disease transmission model that takes into account spatial and temporal trends.</p>
<p>The infectious disease prediction literature has a rich and varied selection of prediction algorithms but has not historically focused on the challenges of generating predictions in real-time. Continued development and refinement of such prediction pipelines, such as that presented here, will enable existing prediction methods to reach their full potential in making an impact on public health decision-making and planning.</p>
<p>The infrastructure that we have developed for integrating real-time data into predictions for the Thai Ministry of Public Health (MOPH) is the result of a long-standing governmental/academic partnership between the MOPH and U.S.-based researchers. This collaboration has enabled the creation of a single, unified authoritative source of almost all governmental dengue surveillance ever collected in Thailand, dating back nearly 50 years [<xref ref-type="bibr" rid="pntd.0004761.ref004">4</xref>]. Additionally, by enabling the transmitting of surveillance data in near real-time (every two weeks from October 2013 and continuing through the writing of this manuscript in 2016), this effort has created a valuable dataset that has catalogued the reporting delays in a live surveillance system. The predictions described in this manuscript were made available to the MOPH typically within two weeks of the data being delivered to the U.S. research team via a PDF report and a private, interactive web application. The MOPH has disseminated these results to provincial, regional, and national decision-makers for use in planning for and monitoring outbreaks. Moving forward, to maximize the use of these predictions, the forecasts will be presented at the monthly high-level meetings of MOPH authorities. Decision makers at the province or health region level will use these forecasts to inform decisions about launching new interventions. Designing studies to evaluate different methods of incorporating these forecasts into real-time decision-making is an area of ongoing research for our team.</p>
<p>Formal data archiving protocols should be followed when making real-time predictions. Real-time predictions should be (1) generated prior to having the final data available and (2) formally registered or time-stamped in an independent data repository. Taking these steps ensures that no bias (intentional or not) enters the scientific process of evaluating the predictions.</p>
<p>While we are actively developing and validating other prediction models for this data, we chose to report the results from the prediction model that we used during 2014 to provide draft predictions to Thai public health officials. We intentionally did not perform extensive <italic>post hoc</italic> model validation or evaluation to minimize the risk of overfitting our model to this particular dataset.</p>
<p>Our 2014 real-time predictions varied substantially by province in quality and public health utility. In over half of the Thai provinces, our model out-performed a seasonal baseline model predicting two months in advance. As the horizon moves forward, the seasonal baseline model makes better predictions in more provinces: at a 5 month horizon, just over 15% of provinces are predicted better by our model than the seasonal model.</p>
<p>Our ability to make effective predictions into the future in a majority of provinces is made difficult by delayed case reporting. Our analyses show that if there were no reporting delays, our model would make substantially more accurate predictions in nearly all of the Thai provinces (<xref ref-type="table" rid="pntd.0004761.t003">Table 3</xref>). In ongoing work, we are focusing efforts on building models that can create accurate “now-casts” of data, using other more readily available data to increase the accuracy of forecasts, an approach that has been implemented by other forecasting efforts [<xref ref-type="bibr" rid="pntd.0004761.ref021">21</xref>].</p>
<p>While we have conducted extensive evaluation of the performance of our real-time predictions in 2014, this may not represent the performance of the model in other years. There is substantial year-to-year variation in annual province-level incidence in Thailand. The annual total number of cases observed in 2014 was in the lower half of previously observed annual incidence in 62 of 76 provinces. A complete characterization of our real-time model’s predictive performance will require evaluation across multiple years of data that is arriving in real-time, or with historical complete data with synthetically created reporting delays.</p>
<p>The simplicity of the statistical prediction models that we present in this manuscript are both a strength and a weakness. This type of phenomenological time-series model tends to show good predictive performance in the short term but have known deficiencies when making long-term predictions. Additionally, when forecasting forward from auto-regressive models, this can lead to instabilities and explosive forecasts, as was observed in the predictions from some of the provinces. Also contributing to the instability of our models in a prediction context are that we do not incorporate uncertainty in and use a smoothed value of the <italic>y</italic><sub><italic>i</italic>, <italic>t</italic> − 1</sub> offset term.</p>
<p>The model that we present here has been shown to perform well in contexts where there are no reporting delays (results not shown). The auto-regressive model used in this work is based on a standard statistical auto-regressive integrated moving average (ARIMA) models. In fact, the reformulation of the ARIMA model in a disease transmission model context—making explicit the connection between modeling auto-regressive counts and the reproductive number, as shown in <xref ref-type="disp-formula" rid="pntd.0004761.e005">eq 2</xref>—is an important link between commonly used models in different fields. Model improvements under consideration include veryifying the utility of spatial features for all provinces, adding spatially smooth seasonal effects, choosing the correlated provinces serially through partial correlations, and incorporating overdispersion of case counts.</p>
<p>The past decade of biomedical research has borne witness to rapid growth in digital surveillance data. A pressing challenge for the professional and academic epidemiological and biostatistical communities is to learn how to turn this deluge of data into evidence that informs decision making about improving health and preventing illness at the individual and population levels. Improving real-time forecasts of infectious disease outbreaks is an important technical achievement, however, continued research and collaboration in this area is needed to develop a better understanding of how to communicate these results to public health decision makers and integrate infectious disease predictions into public health practice. The collaborative effort described by this manuscript provides a template for generating real-time predictions in practice and describes specific results from this effort to integrate modern tools of data science with public health decision making.</p>
</sec>
<sec id="sec016">
<title>Supporting Information</title>
<supplementary-material id="pntd.0004761.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pntd.0004761.s001" xlink:type="simple">
<label>S1 Appendix</label>
<caption>
<title>Methodological details, supplemental figures and results.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pntd.0004761.ref001">
<label>1</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Gubler</surname> <given-names>DJ</given-names></name>. <article-title>Dengue and dengue hemorrhagic fever</article-title>. <source>Clinical microbiology reviews</source>. <year>1998</year>;<volume>11</volume>(<issue>3</issue>):<fpage>480</fpage>–<lpage>496</lpage>. <object-id pub-id-type="pmid">9665979</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref002">
<label>2</label>
<mixed-citation publication-type="other" xlink:type="simple">World Health Organization. Dengue and severe dengue;. Available from: <ext-link ext-link-type="uri" xlink:href="http://www.who.int/mediacentre/factsheets/fs117/en/" xlink:type="simple">http://www.who.int/mediacentre/factsheets/fs117/en/</ext-link>.</mixed-citation>
</ref>
<ref id="pntd.0004761.ref003">
<label>3</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Bhatt</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Gething</surname> <given-names>PW</given-names></name>, <name name-style="western"><surname>Brady</surname> <given-names>OJ</given-names></name>, <name name-style="western"><surname>Messina</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Farlow</surname> <given-names>AW</given-names></name>, <name name-style="western"><surname>Moyes</surname> <given-names>CL</given-names></name>, <etal>et al</etal>. <article-title>The global distribution and burden of dengue</article-title>. <source>Nature</source>. <year>2013</year>;<volume>496</volume>(<issue>7446</issue>):<fpage>504</fpage>–<lpage>507</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature12060" xlink:type="simple">10.1038/nature12060</ext-link></comment> <object-id pub-id-type="pmid">23563266</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref004">
<label>4</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Limkittikul</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Brett</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>L’Azou</surname> <given-names>M</given-names></name>. <article-title>Epidemiological trends of dengue disease in Thailand (2000–2011): a systematic literature review</article-title>. <source>PLOS Neglected Tropical Diseases</source>. <year>2014</year>;<volume>8</volume>(<issue>11</issue>):<fpage>e3241</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pntd.0003241" xlink:type="simple">10.1371/journal.pntd.0003241</ext-link></comment> <object-id pub-id-type="pmid">25375766</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref005">
<label>5</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Endy</surname> <given-names>TP</given-names></name>, <name name-style="western"><surname>Anderson</surname> <given-names>KB</given-names></name>, <name name-style="western"><surname>Nisalak</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Yoon</surname> <given-names>IK</given-names></name>, <name name-style="western"><surname>Green</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Rothman</surname> <given-names>AL</given-names></name>, <etal>et al</etal>. <article-title>Determinants of inapparent and symptomatic dengue infection in a prospective study of primary school children in Kamphaeng Phet, Thailand</article-title>. <source>PLOS Neglected Tropical Diseases</source>. <year>2011</year>;<volume>5</volume>(<issue>3</issue>):<fpage>e975</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pntd.0000975" xlink:type="simple">10.1371/journal.pntd.0000975</ext-link></comment> <object-id pub-id-type="pmid">21390158</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref006">
<label>6</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Endy</surname> <given-names>TP</given-names></name>, <name name-style="western"><surname>Chunsuttiwat</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Nisalak</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Libraty</surname> <given-names>DH</given-names></name>, <name name-style="western"><surname>Green</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Rothman</surname> <given-names>AL</given-names></name>, <etal>et al</etal>. <article-title>Epidemiology of inapparent and symptomatic acute dengue virus infection: a prospective study of primary school children in Kamphaeng Phet, Thailand</article-title>. <source>American journal of epidemiology</source>. <year>2002</year>;<volume>156</volume>(<issue>1</issue>):<fpage>40</fpage>–<lpage>51</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1093/aje/kwf005" xlink:type="simple">10.1093/aje/kwf005</ext-link></comment> <object-id pub-id-type="pmid">12076887</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref007">
<label>7</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Endy</surname> <given-names>TP</given-names></name>, <name name-style="western"><surname>Yoon</surname> <given-names>IK</given-names></name>, <name name-style="western"><surname>Mammen</surname> <given-names>MP</given-names></name>. <article-title>Prospective cohort studies of dengue viral transmission and severity of disease</article-title>. <source>Current topics in microbiology and immunology</source>. <year>2010</year>;<volume>338</volume>:<fpage>1</fpage>–<lpage>13</lpage>. <object-id pub-id-type="pmid">19802574</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref008">
<label>8</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Cazelles</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Chavez</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>McMichael</surname> <given-names>AJ</given-names></name>, <name name-style="western"><surname>Hales</surname> <given-names>S</given-names></name>. <article-title>Nonstationary influence of El Niño on the synchronous dengue epidemics in Thailand</article-title>. <source>PLOS Medicine</source>. <year>2005</year>;<volume>2</volume>(<issue>4</issue>):<fpage>e106</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pmed.0020106" xlink:type="simple">10.1371/journal.pmed.0020106</ext-link></comment> <object-id pub-id-type="pmid">15839751</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref009">
<label>9</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Cummings</surname> <given-names>DAT</given-names></name>, <name name-style="western"><surname>Irizarry</surname> <given-names>RA</given-names></name>, <name name-style="western"><surname>Huang</surname> <given-names>NE</given-names></name>, <name name-style="western"><surname>Endy</surname> <given-names>TP</given-names></name>, <name name-style="western"><surname>Nisalak</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Ungchusak</surname> <given-names>K</given-names></name>, <etal>et al</etal>. <article-title>Travelling waves in the occurrence of dengue haemorrhagic fever in Thailand</article-title>. <source>Nature</source>. <year>2004</year>;<volume>427</volume>(<issue>6972</issue>):<fpage>344</fpage>–<lpage>347</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature02225" xlink:type="simple">10.1038/nature02225</ext-link></comment> <object-id pub-id-type="pmid">14737166</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref010">
<label>10</label>
<mixed-citation publication-type="other" xlink:type="simple">Centers for Disease Control and Prevention. Announcement of Requirements and Registration for the Predict the Influenza Season Challenge; 2013. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.federalregister.gov/articles/2013/11/25/2013-28198/announcement-of-requirements-and-registration-for-the-predict-the-influenza-season-challenge" xlink:type="simple">https://www.federalregister.gov/articles/2013/11/25/2013-28198/announcement-of-requirements-and-registration-for-the-predict-the-influenza-season-challenge</ext-link>.</mixed-citation>
</ref>
<ref id="pntd.0004761.ref011">
<label>11</label>
<mixed-citation publication-type="other" xlink:type="simple">Defense Advanced Research Projects Agency. DARPA Forecasting Chikungunya Challenge; 2014. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.innocentive.com/ar/challenge/9933617?" xlink:type="simple">https://www.innocentive.com/ar/challenge/9933617?</ext-link></mixed-citation>
</ref>
<ref id="pntd.0004761.ref012">
<label>12</label>
<mixed-citation publication-type="other" xlink:type="simple">Pandemic Prediction and Forecasting Science and Technology Interagency Working Group. Dengue Forecating; 2015. Available from: <ext-link ext-link-type="uri" xlink:href="http://dengueforecasting.noaa.gov/" xlink:type="simple">http://dengueforecasting.noaa.gov/</ext-link>.</mixed-citation>
</ref>
<ref id="pntd.0004761.ref013">
<label>13</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Shaman</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Karspeck</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Yang</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Tamerius</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lipsitch</surname> <given-names>M</given-names></name>. <article-title>Real-time influenza forecasts during the 2012–2013 season</article-title>. <source>Nature Communications</source>. <year>2013</year>;<volume>4</volume>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/ncomms3837" xlink:type="simple">10.1038/ncomms3837</ext-link></comment> <object-id pub-id-type="pmid">24302074</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref014">
<label>14</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Nishiura</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Chowell</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Heesterbeek</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Wallinga</surname> <given-names>J</given-names></name>. <article-title>The ideal reporting interval for an epidemic to objectively interpret the epidemiological time course</article-title>. <source>Journal of The Royal Society Interface</source>. <year>2010</year>;<volume>7</volume>(<issue>43</issue>):<fpage>297</fpage>–<lpage>307</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1098/rsif.2009.0153" xlink:type="simple">10.1098/rsif.2009.0153</ext-link></comment></mixed-citation>
</ref>
<ref id="pntd.0004761.ref015">
<label>15</label>
<mixed-citation publication-type="other" xlink:type="simple">R Core Team. R: A Language and Environment for Statistical Computing; 2015.</mixed-citation>
</ref>
<ref id="pntd.0004761.ref016">
<label>16</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Zeger</surname> <given-names>SL</given-names></name>, <name name-style="western"><surname>Qaqish</surname> <given-names>B</given-names></name>. <article-title>Markov regression models for time series: a quasi-likelihood approach</article-title>. <source>Biometrics</source>. <year>1988</year>;<volume>44</volume>(<issue>4</issue>):<fpage>1019</fpage>–<lpage>1031</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.2307/2531732" xlink:type="simple">10.2307/2531732</ext-link></comment> <object-id pub-id-type="pmid">3148334</object-id></mixed-citation>
</ref>
<ref id="pntd.0004761.ref017">
<label>17</label>
<mixed-citation publication-type="book" xlink:type="simple">
<name name-style="western"><surname>Hastie</surname> <given-names>TJ</given-names></name>, <name name-style="western"><surname>Tibshirani</surname> <given-names>RJ</given-names></name>. <source>Generalized Additive Models</source>. <publisher-name>CRC Press</publisher-name>; <year>1990</year>.</mixed-citation>
</ref>
<ref id="pntd.0004761.ref018">
<label>18</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Wood</surname> <given-names>SN</given-names></name>. <article-title>Fast stable restricted maximum likelihood and marginal likelihood estimation of semiparametric generalized linear models</article-title>. <source>Journal of the Royal Statistical Society Series B-Statistical Methodology</source>. <year>2011</year>;<volume>73</volume>(<issue>1</issue>):<fpage>3</fpage>–<lpage>36</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1111/j.1467-9868.2010.00749.x" xlink:type="simple">10.1111/j.1467-9868.2010.00749.x</ext-link></comment></mixed-citation>
</ref>
<ref id="pntd.0004761.ref019">
<label>19</label>
<mixed-citation publication-type="journal" xlink:type="simple">
<name name-style="western"><surname>Hyndman</surname> <given-names>RJ</given-names></name>, <name name-style="western"><surname>Koehler</surname> <given-names>AB</given-names></name>. <article-title>Another look at measures of forecast accuracy</article-title>. <source>International Journal of Forecasting</source>. <year>2006</year>;<volume>22</volume>(<issue>4</issue>):<fpage>679</fpage>–<lpage>688</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1016/j.ijforecast.2006.03.001" xlink:type="simple">10.1016/j.ijforecast.2006.03.001</ext-link></comment></mixed-citation>
</ref>
<ref id="pntd.0004761.ref020">
<label>20</label>
<mixed-citation publication-type="other" xlink:type="simple">Reich NG, Lessler J, Sakrejda K, Lauer SA, Iamsirithaworn S, Cummings DAT. Case studies in evaluating time series prediction models using the relative mean absolute error. SelectedWorks. 2015;.</mixed-citation>
</ref>
<ref id="pntd.0004761.ref021">
<label>21</label>
<mixed-citation publication-type="other" xlink:type="simple">Chakraborty P, Khadivi P, Lewis B, Mahendiran A, Chen J, Butler P, et al. Forecasting a Moving Target: Ensemble Models for ILI Case Count Predictions. Proceedings of the 2014 SIAM International Conference on Data Mining. 2014; p. 262–270.</mixed-citation>
</ref>
</ref-list>
</back>
</article>