<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0314005</article-id>
<article-id pub-id-type="publisher-id">PONE-D-24-22455</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Plant science</subject><subj-group><subject>Plant anatomy</subject><subj-group><subject>Pollen</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Plants</subject><subj-group><subject>Poaceae</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Numerical analysis</subject><subj-group><subject>Interpolation</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Algebra</subject><subj-group><subject>Linear algebra</subject><subj-group><subject>Singular value decomposition</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Earth sciences</subject><subj-group><subject>Seasons</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Applied mathematics</subject><subj-group><subject>Algorithms</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Simulation and modeling</subject><subj-group><subject>Algorithms</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Mathematical and statistical techniques</subject><subj-group><subject>Mathematical functions</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>A new method based on physical patterns to impute aerobiological datasets</article-title>
<alt-title alt-title-type="running-head">A new method based on physical patterns to impute aerobiological datasets</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-9602-970X</contrib-id>
<name name-style="western">
<surname>Tagliaferro</surname>
<given-names>Sofia</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Corrochano</surname>
<given-names>Adrián</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Marchetti</surname>
<given-names>Pierpaolo</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" equal-contrib="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-2778-658X</contrib-id>
<name name-style="western">
<surname>Marcon</surname>
<given-names>Alessandro</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<name name-style="western">
<surname>Le Clainche</surname>
<given-names>Soledad</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Unit of Epidemiology and Medical Statistics, Department of Diagnostics and Public Health, University of Verona, Verona, Italy</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>School of Aerospace Engineering, Universidad Politécnica de Madrid, Madrid, Spain</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Singh</surname>
<given-names>Rajeev</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Satyawati College, University of Delhi, INDIA</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">alessandro.marcon@univr.it</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>11</month>
<year>2024</year>
</pub-date>
<pub-date pub-type="collection">
<year>2024</year>
</pub-date>
<volume>19</volume>
<issue>11</issue>
<elocation-id>e0314005</elocation-id>
<history>
<date date-type="received">
<day>3</day>
<month>6</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>4</day>
<month>11</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-year>2024</copyright-year>
<copyright-holder>Tagliaferro et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0314005"/>
<abstract>
<p>Limited research has assessed the accuracy of imputation methods in aerobiological datasets. We conducted a simulation study to evaluate, for the first time, the effectiveness of Gappy Singular Value Decomposition (GSVD), a data-driven approach, comparing it with the moving mean interpolation, a statistical approach. Utilizing complete pollen data from two monitoring stations in northeastern Italy for 2022, we randomly generated missing data considering the combination of various proportions (5%, 10%, 25%) and gap lengths (3, 5, 7, 10 days). We imputed 4800 time series using the GSVD algorithm, specifically implemented for this study, and the moving mean algorithm of the “AeRobiology” R package. We assessed imputation accuracy by calculating the Root Mean Square Error and employed multiple linear regression models to identify factors independently affecting the error (e.g. pollen variability, simulation settings). The results showed that the GSVD was as good as the well-established moving mean method and demonstrated its strong generalization capabilities across different data types. However, the imputation error was primarily influenced by pollen characteristics and location, regardless of the imputation method used. High variability in pollen concentrations and the distribution of missing data negatively affected imputation accuracy. In conclusion, we introduced and tested a novel imputation method, demonstrating comparable performance to the statistical approach in aerobiological data reconstruction. These findings contribute to advancing aerobiological data analysis, highlighting the need for improving imputation methods.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>ESF REACT-EU Green and Innovation</institution>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-2778-658X</contrib-id>
<name name-style="western">
<surname>Marcon</surname>
<given-names>Alessandro</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution>NextGenerationEu</institution>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-2778-658X</contrib-id>
<name name-style="western">
<surname>Marcon</surname>
<given-names>Alessandro</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award003">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100004837</institution-id>
<institution>Ministerio de Ciencia e Innovación</institution>
</institution-wrap>
</funding-source>
<award-id>PID2023-147790OB-I00</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Clainche</surname>
<given-names>Soledad Le</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award004">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100004837</institution-id>
<institution>Ministerio de Ciencia e Innovación</institution>
</institution-wrap>
</funding-source>
<award-id>TED2021-129774B-C21</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Clainche</surname>
<given-names>Soledad Le</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award005">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100004837</institution-id>
<institution>Ministerio de Ciencia e Innovación</institution>
</institution-wrap>
</funding-source>
<award-id>PLEC2022-009235</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Clainche</surname>
<given-names>Soledad Le</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award006">
<funding-source>
<institution>NextGenerationEU”/PRTR</institution>
</funding-source>
<principal-award-recipient>
<name name-style="western">
<surname>Clainche</surname>
<given-names>Soledad Le</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award007">
<funding-source>
<institution>Horizon Europe research and innovation programme</institution>
</funding-source>
<award-id>101072559</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Clainche</surname>
<given-names>Soledad Le</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award008">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/100018694</institution-id>
<institution>HORIZON EUROPE Marie Sklodowska-Curie Actions</institution>
</institution-wrap>
</funding-source>
<award-id>101072779</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Clainche</surname>
<given-names>Soledad Le</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award009">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100007052</institution-id>
<institution>Università degli Studi di Verona</institution>
</institution-wrap>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-2778-658X</contrib-id>
<name name-style="western">
<surname>Marcon</surname>
<given-names>Alessandro</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award010">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100003759</institution-id>
<institution>Universidad Politécnica de Madrid</institution>
</institution-wrap>
</funding-source>
<principal-award-recipient>
<name name-style="western">
<surname>Corrochano</surname>
<given-names>Adrián</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>A.M. received grants to conduct the MEETOUT study from the European Union through the Italian Ministry of University and Research under the ESF REACT-EU Green and Innovation funding programme (Ministerial Decree 1061/2021) and the NextGenerationEu funding programme (Ministerial Decree 737/2021). Article processing charges were supported by the special fund at the University of Verona dedicated to Open Access publications. S.L.C. and A.C. acknowledge the grants PID2023-147790OB-I00, TED2021-129774B-C21 and PLEC2022-009235 funded by MCIN/AEI/10.13039/501100011033 and by the European Union “NextGenerationEU”/PRTR. The authors acknowledge the MODELAIR and ENCODING projects that have received funding from the European Union’s Horizon Europe research and innovation programme under the Marie Sklodowska-Curie grant agreement No. 101072559 and 101072779, respectively. The results of this publication reflect only the authors view and do not necessarily reflect those of the European Union. The European Union cannot be held responsible for them. A.C. acknowledges the support of Universidad Politécnica de Madrid, under the program ‘Programa Propio’. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript. There was no additional external funding received for this study.</funding-statement>
</funding-group>
<counts>
<fig-count count="5"/>
<table-count count="3"/>
<page-count count="14"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All the materials underlying the results presented in the study are available from the Modelflows-app website (<ext-link ext-link-type="uri" xlink:href="https://modelflows.github.io/modelflowsapp/airpollution/" xlink:type="simple">https://modelflows.github.io/modelflowsapp/airpollution/</ext-link>). These include the original pollen datasets downloaded from POLLnet (<ext-link ext-link-type="uri" xlink:href="https://pollnet.isprambiente.it/" xlink:type="simple">https://pollnet.isprambiente.it/</ext-link>), the R and Python codes to generate and input missing data, sample gappy datasets, a brief overview of the paper, and a video explanation of the methodology.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Aerobiology is a recent discipline focusing on atmospheric bioaerosols, such as pollen and spores [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0314005.ref002">2</xref>]. Its interdisciplinary approach allows for the examination of the impacts of climate change and the development of innovative methodologies aimed at managing allergic diseases [<xref ref-type="bibr" rid="pone.0314005.ref003">3</xref>]. Aerobiological data are typically measured on daily basis and are provided by local/national monitoring networks. Despite the existence of automatic sampling devices, current monitoring practices primarily rely on manual samplers, introducing the possibility of systematic errors [<xref ref-type="bibr" rid="pone.0314005.ref004">4</xref>–<xref ref-type="bibr" rid="pone.0314005.ref006">6</xref>].</p>
<p>Pollen time series are frequently incomplete due to malfunctions and maintenance of the monitoring stations [<xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>], as well as voluntary interruptions in periods considered irrelevant for the measures. Consequently, the presence of missing data in aerobiological datasets is common, prompting the need for imputation methods. Traditional methods, as omitting to assign values to missing data, may lead to underestimation errors [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0314005.ref007">7</xref>, <xref ref-type="bibr" rid="pone.0314005.ref008">8</xref>].</p>
<p>Statistical and artificial intelligence methodologies have been implemented for data imputation. Statistical approaches such as linear interpolation [<xref ref-type="bibr" rid="pone.0314005.ref009">9</xref>–<xref ref-type="bibr" rid="pone.0314005.ref012">12</xref>], cubic spline interpolation [<xref ref-type="bibr" rid="pone.0314005.ref012">12</xref>], the Gaussian method [<xref ref-type="bibr" rid="pone.0314005.ref013">13</xref>] or averaging values from other years for each day with missing data [<xref ref-type="bibr" rid="pone.0314005.ref009">9</xref>], are commonly used in aerobiological studies. The availability of pre-set statistical software packages facilitates the application of the most common methodologies used in data imputation [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. Recently, the “AeRobiology” R package was developed specifically to manage and visualize aerobiological data, as well as to impute missing data [<xref ref-type="bibr" rid="pone.0314005.ref006">6</xref>]. In this package different interpolation methods are implemented, including linear, moving mean, spline, time series analysis, and nearby locations interpolation. The moving mean method is a statistical univariate approach. It consists of filling in missing values by averaging nearby data within a symmetrical interval that is twice the length of the gap [<xref ref-type="bibr" rid="pone.0314005.ref006">6</xref>].</p>
<p>In the last years, computational intelligence techniques have gained popularity in pollen time series analysis [<xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>], but their application in missing data imputation is less explored. Convolutional Neural Networks [<xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>], Denoising Convolutional Auto-encoder [<xref ref-type="bibr" rid="pone.0314005.ref013">13</xref>], and k-Nearest Neighbours algorithm [<xref ref-type="bibr" rid="pone.0314005.ref014">14</xref>] are among the approaches used. Natural systems are physical (spatio-temporal) systems characterised by dominant non-linear structures that evolve over time (such as seasonality or climate variations) that are unknown. Identifying data tendencies connected to physics enables generalization for application across various fields [<xref ref-type="bibr" rid="pone.0314005.ref015">15</xref>–<xref ref-type="bibr" rid="pone.0314005.ref017">17</xref>]. Machine learning tools could be useful to repair corrupted or incomplete datasets, using the relevant spatio-temporal information directly from the data. Of these, the Singular Value Decomposition (SVD) is a data-driven multivariate method, useful for post-processing and handling data. The SVD, based on simple linear algebra, is the primary technique behind many dimensionality reduction methods, such as the Principal Component Analysis. The SVD method is able to recognise and extract the relevant spatio-temporal information directly from the data, removing noise and filtering out spatial redundancies, thus leading to dimensionality reduction. To address missing data, the Gappy SVD (GSVD) has been implemented, utilizing SVD properties to iteratively repair and reconstruct datasets. This algorithm has already been successfully applied to reconstruct fluid flow [<xref ref-type="bibr" rid="pone.0314005.ref016">16</xref>, <xref ref-type="bibr" rid="pone.0314005.ref018">18</xref>] or oceanographic datasets [<xref ref-type="bibr" rid="pone.0314005.ref019">19</xref>], but it has never been tested on aerobiological datasets.</p>
<p>To assess imputation accuracy, simulation studies are conducted by generating missing data scenarios in complete datasets and comparing simulated and observed values. While simulation studies on environmental datasets have been widely explored (e.g. meteorological, hydrological, and air pollution data) [<xref ref-type="bibr" rid="pone.0314005.ref020">20</xref>–<xref ref-type="bibr" rid="pone.0314005.ref023">23</xref>], the challenges of imputation in aerobiological data remain less studied. Picornell et al. tested the ability of the different interpolation methods implemented in the “AeRobiology” R package, simulating random missing data in patterns of 3, 5, 7, and 10 consecutive days in different pollen seasonal periods (pre-season, pre-peak, peak, post-peak, and post-season) [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. Navares et al. evaluated the performance of geographical imputation via Convolutional Neural Networks, generating 10, 20, and 30% of missing values in all periods, peak and off-peak season [<xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>].</p>
<p>This paper introduces a novel implementation of the Gappy Singular Value Decomposition (GSVD) algorithm, a data-driven method, specifically tailored for the application to aerobiological datasets in this study. The imputation accuracy of this method was compared to a well-known statistical method, the moving mean algorithm.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="sec003">
<title>Aerobiological data</title>
<p>POLLnet is the aerobiological monitoring network of the National System for Environmental Protection (SNPA) of Italy, which aggregates aerobiological monitoring data measured by regions and provinces into a nationwide database (<ext-link ext-link-type="uri" xlink:href="https://pollnet.isprambiente.it/" xlink:type="simple">https://pollnet.isprambiente.it/</ext-link>). The network’s monitoring follows the European Standard UNI EN 16868 2019, using Hirst-type volumetric samplers with a calibrated pump aspirating 10 l/min of air in 24 hours. Airborne particles are captured on a rotating metallic drum with an adhesive tape. The sampling drum is extracted every seven days, and the tape is cut into fragments corresponding to each monitoring day. These fragments are then examined under a microscope at 400× magnification by a specialized technician, and daily pollen grains are counted based on their morphological characteristics. The count is recorded as the number of pollen grains per cubic meter of air (p/m<sup>3</sup>) [<xref ref-type="bibr" rid="pone.0314005.ref024">24</xref>, <xref ref-type="bibr" rid="pone.0314005.ref025">25</xref>].</p>
<p>We constructed the aerobiological datasets using RStudio version 4.2.2 [<xref ref-type="bibr" rid="pone.0314005.ref026">26</xref>]. For the study purposes, we selected two monitoring stations representing different environments in the Northeast of Italy (<xref ref-type="fig" rid="pone.0314005.g001">Fig 1</xref>): VI1 in Vicenza, lowlands with continental climate, and BZ2 in Bolzano, mountains with alpine climate.</p>
<fig id="pone.0314005.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Location of the selected monitoring stations in the Northeast of Italy and the respective pollen time series for the year 2022.</title>
<p>The map was produced using the QuickMapServices plugin (NextGIS, 2019) in QGIS software version 3.34.9 (QGIS Development Team. QGIS Geographic Information System. Open-Source Geospatial Foundation Project. <ext-link ext-link-type="uri" xlink:href="http://qgis.org/" xlink:type="simple">http://qgis.org</ext-link>). The basemap used is ESRI Terrain (ESRI, Redlands, CA, USA). BZ2: Bolzano; VI1: Vicenza; p/m<sup>3</sup>: pollen/cubic meter.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.g001" xlink:type="simple"/>
</fig>
<p>We downloaded daily pollen concentrations for the period 2018–2022 using the “pollnet” R package (<ext-link ext-link-type="uri" xlink:href="https://rpubs.com/gbonafe/pollnet-data-extraction" xlink:type="simple">https://rpubs.com/gbonafe/pollnet-data-extraction</ext-link>). The dataset is available at <ext-link ext-link-type="uri" xlink:href="https://modelflows.github.io/modelflowsapp/airpollution/" xlink:type="simple">https://modelflows.github.io/modelflowsapp/airpollution/</ext-link>. <italic>Alnus</italic> and Poaceae pollens were considered for the analysis due to their different seasonality, temporal distribution, and load characteristics (as shown from the 2022 time series in <xref ref-type="fig" rid="pone.0314005.g001">Fig 1</xref>).</p>
<p>We computed the start and end dates of the season for each pollen time series using the 95-percentage method (start: 2.5%; end: 97.5%) from the “AeRobiology” R package [<xref ref-type="bibr" rid="pone.0314005.ref006">6</xref>, <xref ref-type="bibr" rid="pone.0314005.ref027">27</xref>]. This method was solely used to define convenient periods within the pollen seasons to generate random missing data. Then, we examined missing data to identify the year with the most complete data coverage during the seasonal pollen period. The years from 2020 to 2022 showed no missing data at station VI1, whereas station BZ2 had complete data throughout the period (2018–2022). We chose to simulate the pollen season of the year 2022 to ensure a complete data series for the preceding years, thus guaranteeing the applicability of the data-driven method. <xref ref-type="fig" rid="pone.0314005.g002">Fig 2</xref> shows the structure of the original dataset and the time series extracted for the simulation study.</p>
<fig id="pone.0314005.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Scheme depicting the original dataset of daily pollen concentrations for the period 2018–2022 and time series extracted for the simulation study.</title>
<p>BZ2: Bolzano; VI1: Vicenza; NAs: missing data. Each season was obtained from the earlier start and later end day of the observed pollen seasons across the 2 monitoring stations: Season 2018 (start: 31/01/2018, <italic>Alnus</italic> BZ2; end: 17/09/2018, Poaceae VI1); Season 2019 (start: 11/02/2019, <italic>Alnus</italic> VI1; end: 17/09/2019, Poaceae VI1); Season 2020 (start: 30/01/2020, <italic>Alnus</italic> BZ2; end: 05/09/2020, Poaceae VI1); Season 2021 (start: 08/02/2021, <italic>Alnus</italic> VI1; end: 14/09/2021, Poaceae VI1); Season 2022 (start: 26/01/2022, <italic>Alnus</italic> BZ2; end: 08/10/2022, Poaceae VI1).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.g002" xlink:type="simple"/>
</fig>
<p>Descriptive statistics of pollen concentrations at each station in the pollen season 2022 were calculated: mean ± Standard Deviation (SD), quartiles, coefficient of variation (CV = (SD/mean) × 100) (%), and duration of the pollen season.</p>
<p>As the start/end dates varied depending on the monitoring station, for each pollen, we considered a common seasonal period in the year 2022 by extending the season to the first day of the month on which the minimum start date occurred and to the last day of the month on which the maximum end date occurred. As a result, the period considered for imputation was 01/01/2022 to 31/05/2022 for <italic>Alnus</italic> and 01/04/2022 to 31/10/2022 for Poaceae.</p>
</sec>
<sec id="sec004">
<title>Methods of imputation investigated</title>
<p>We utilized the moving mean method of the “AeRobiology” R package as specifically developed for aerobiological datasets. The GSVD method used in this study was the algorithm originated from the ModelFLOWs-app (code available at <ext-link ext-link-type="uri" xlink:href="https://modelflows.github.io/modelflowsapp/airpollution/" xlink:type="simple">https://modelflows.github.io/modelflowsapp/airpollution/</ext-link>), a novel software implementing modal decomposition methods and hybrid machine learning tools to solve problems in complex nonlinear dynamical systems with application on patterns identification, data reconstruction, and data forecasting [<xref ref-type="bibr" rid="pone.0314005.ref016">16</xref>].</p>
<p>We initialised the GSVD algorithm assigning an initial value to the missing data. In this paper, the mean value of the time series (hereafter GSVD mean) and a linear interpolation between values of the time series (hereafter GSVD interp) were used for the initialisation. Then, SVD was applied to the initial dataset, as <italic>X</italic> = <italic>UΣV</italic><sup><italic>T</italic></sup>, where the matrices U and V contain the modes (i.e. the spatio-temporal data decomposed by the Proper Orthogonal Decomposition mathematical approach) and the temporal coefficients, ()<sup><italic>T</italic></sup> denotes the matrix transpose, and Σ is the diagonal matrix containing the singular values of the matrix X. The first modes contain the physical modes related to the problem, while the rest are related to noise, spatial redundancies or to fit this initial guess. Retaining the first number N of modes, which can be tuned, one can approximate the database as <italic>X</italic>* = <italic>U</italic>*Σ*<italic>V</italic><sup><italic>T</italic>*</sup>. The gaps of the original dataset were updated using the values of this approximation. Afterwards, SVD was applied again iteratively until the Mean Square Error (calculated as the ratio between the difference of the original and the reconstructed dataset and the total number of samples) of the gaps between two iterations is lower than a tolerance, set as 10<sup>−6</sup>. More information about the algorithm and the implementation can be found in Díaz-Morales et al. (2024) and Hetherington et al. (2023, 2024) [<xref ref-type="bibr" rid="pone.0314005.ref015">15</xref>–<xref ref-type="bibr" rid="pone.0314005.ref017">17</xref>].</p>
</sec>
<sec id="sec005">
<title>Simulation study</title>
<p>For each pollen type and station, we generated 12 simulation scenarios by combining 3 missing data proportions (5%, 10%, 25%) and 4 gap lengths (number of consecutive missing days: 3, 5, 7, 10 days). For each simulation scenario we obtained 100 simulated datasets. We randomly removed daily observed data from the complete pollen seasonal time series following the subsequent procedure (see <xref ref-type="table" rid="pone.0314005.t001">Table 1</xref>):</p>
<list list-type="roman-lower">
<list-item><p>calculation of the number of days within the pollen season corresponding to the total proportions of NAs of 5%, 10%, and 25%;</p></list-item>
<list-item><p>calculation of the number of gaps for each gap length pattern (3, 5, 7, and 10 days) to approximate the total number of days with NAs from step i;</p></list-item>
<list-item><p>implementation of the algorithm to randomly remove data iteratively 100 times in RStudio, setting the number of consecutive days and the number of gaps from steps i and ii without overlapping gaps.</p></list-item>
</list>
<table-wrap id="pone.0314005.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.t001</object-id>
<label>Table 1</label> <caption><title>Settings of simulation scenarios and the resulting percentages of NAs obtained from simulations.</title></caption>
<alternatives>
<graphic id="pone.0314005.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" rowspan="2">Pollen</th>
<th align="left" rowspan="2">Season duration (days)</th>
<th align="left" colspan="4">NAs simulation settings</th>
<th align="center" colspan="2">Resulting NAs</th>
</tr>
<tr>
<th align="left">%</th>
<th align="left">Total days</th>
<th align="left">Gap length (consequent days)</th>
<th align="left">Number of gaps</th>
<th align="left">Total days</th>
<th align="left">%</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left" rowspan="12"><italic>Alnus</italic></td>
<td align="left" rowspan="11">151 (01 Jan—31 May)</td>
<td align="left" rowspan="4">5</td>
<td align="left" rowspan="4">7.55</td>
<td align="left">3</td>
<td align="left">3</td>
<td align="left">9</td>
<td align="left">6</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">2</td>
<td align="left">10</td>
<td align="left">6.6</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">1</td>
<td align="left">7</td>
<td align="left">4.6</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">1</td>
<td align="left">10</td>
<td align="left">6.6</td>
</tr>
<tr>
<td align="left" rowspan="4">10</td>
<td align="left" rowspan="4">15.1</td>
<td align="left">3</td>
<td align="left">5</td>
<td align="left">15</td>
<td align="left">9.9</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">3</td>
<td align="left">15</td>
<td align="left">9.9</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">2</td>
<td align="left">14</td>
<td align="left">9.3</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">2</td>
<td align="left">20</td>
<td align="left">13.2</td>
</tr>
<tr>
<td align="left" rowspan="4">25</td>
<td align="left" rowspan="4">37.75</td>
<td align="left">3</td>
<td align="left">13</td>
<td align="left">39</td>
<td align="left">25.8</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">8</td>
<td align="left">40</td>
<td align="left">26.5</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">5</td>
<td align="left">35</td>
<td align="left">23.2</td>
</tr>
<tr>
<td align="left"/>
<td align="left">10</td>
<td align="left">4</td>
<td align="left">40</td>
<td align="left">26.5</td>
</tr>
<tr>
<td align="left" rowspan="12">Poaceae</td>
<td align="left" rowspan="12">214 (01 Apr– 31 Oct)</td>
<td align="left" rowspan="4">5</td>
<td align="left" rowspan="4">10.7</td>
<td align="left">3</td>
<td align="left">4</td>
<td align="left">12</td>
<td align="left">5.6</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">2</td>
<td align="left">10</td>
<td align="left">4.7</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">2</td>
<td align="left">14</td>
<td align="left">6.5</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">1</td>
<td align="left">10</td>
<td align="left">4.7</td>
</tr>
<tr>
<td align="left" rowspan="4">10</td>
<td align="left" rowspan="4">21.4</td>
<td align="left">3</td>
<td align="left">7</td>
<td align="left">21</td>
<td align="left">9.8</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">4</td>
<td align="left">20</td>
<td align="left">9.3</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">3</td>
<td align="left">21</td>
<td align="left">9.8</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">2</td>
<td align="left">20</td>
<td align="left">9.3</td>
</tr>
<tr>
<td align="left" rowspan="4">25</td>
<td align="left" rowspan="4">53.5</td>
<td align="left">3</td>
<td align="left">18</td>
<td align="left">54</td>
<td align="left">25.2</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">11</td>
<td align="left">55</td>
<td align="left">25.7</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">8</td>
<td align="left">56</td>
<td align="left">26.2</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">5</td>
<td align="left">50</td>
<td align="left">23.4</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t001fn001"><p>NAs: Missing data. The simulations are in total 12 for each pollen and station.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>As a result, we obtained a total of 48 simulations (12 scenarios x 2 stations x 2 pollens), each with 100 time series for imputation. An example of the NAs generation process and resulting dataset is reported in <xref ref-type="fig" rid="pone.0314005.g003">Fig 3</xref>.</p>
<fig id="pone.0314005.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Example of generation of missing values (NAs) and resulting dataset for <italic>Alnus</italic> BZ2.</title>
<p>BZ2: Bolzano.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.g003" xlink:type="simple"/>
</fig>
<p>The RStudio code for the generation of missing data and examples of gappy datasets are available at <ext-link ext-link-type="uri" xlink:href="https://modelflows.github.io/modelflowsapp/airpollution/" xlink:type="simple">https://modelflows.github.io/modelflowsapp/airpollution/</ext-link>.</p>
</sec>
<sec id="sec006">
<title>Imputation and accuracy evaluation</title>
<p>To assess the accuracy of the imputation methods, we compared the reconstructed datasets to the observed time series, calculating the Root Mean Square Error (RMSE), i.e. the sum of the squared differences between the predicted and observed values divided by the total number of observations (N) (1).</p>
<disp-formula id="pone.0314005.e001">
<alternatives>
<graphic id="pone.0314005.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0314005.e001" xlink:type="simple"/>
<mml:math display="block" id="M1">
<mml:mrow><mml:mi mathvariant="normal">RMSE</mml:mi><mml:mo>=</mml:mo></mml:mrow><mml:msqrt><mml:mfrac><mml:mrow><mml:mrow><mml:munderover><mml:mo stretchy="false">∑</mml:mo><mml:mrow><mml:mi>i</mml:mi><mml:mo>=</mml:mo><mml:mn>1</mml:mn></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:munderover><mml:mrow><mml:msup><mml:mrow><mml:mo>(</mml:mo><mml:mrow><mml:msub><mml:mrow><mml:mi>P</mml:mi><mml:mi>r</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi><mml:mi>i</mml:mi><mml:mi>c</mml:mi><mml:mi>t</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub><mml:mo>−</mml:mo><mml:msub><mml:mrow><mml:mi>O</mml:mi><mml:mi>b</mml:mi><mml:mi>s</mml:mi><mml:mi>e</mml:mi><mml:mi>r</mml:mi><mml:mi>v</mml:mi><mml:mi>e</mml:mi><mml:mi>d</mml:mi></mml:mrow><mml:mrow><mml:mi>i</mml:mi></mml:mrow></mml:msub></mml:mrow><mml:mo>)</mml:mo></mml:mrow><mml:mrow><mml:mn>2</mml:mn></mml:mrow></mml:msup></mml:mrow></mml:mrow></mml:mrow><mml:mrow><mml:mi>N</mml:mi></mml:mrow></mml:mfrac></mml:msqrt>
</mml:math>
</alternatives>
<label>(1)</label>
</disp-formula>
<p>As part of the “AeRobiology” R package, we executed the moving mean method using RStudio. We developed an algorithm to iteratively apply the “interpollen” function with “movingmean” method to each column of individual datasets (pollen/station). After that, we merged the imputed dataset with the original corresponding pollen time series, and we implemented a function to iteratively calculate the RMSE between real data and the 100 replications of the simulated data. The final dataset contained the RMSE from all 100 simulations.</p>
<p>We implemented the GSVD algorithm in Python and ran it with Visual Studio Code version 1.86. As data-driven methods rely on extensive datasets to effectively capture data variability [<xref ref-type="bibr" rid="pone.0314005.ref028">28</xref>], we incorporated the 100 incomplete time series from each pollen, station, and simulation scenario into the original dataset including monitoring data spanning from 2018 to 2022. We studied different settings, changing the first initialisation of the values of the gaps and the number of modes, and evaluated the performance reconstruction of the gaps. Two imputation cases are shown in this paper for the sake of clarity, although other combinations showed similar results: the GSVD mean 5modes and the GSVD interp 10modes. At the end of each imputation, the algorithm calculated the RMSE for each repetition and for each pollen time series and extracted the results as a dataset.</p>
<p>Finally, we merged the RMSE from the different imputations, and then we calculated the median RMSE for each imputation method and each combination of NAs.</p>
<p>Besides this, we checked if the natural variability of the pollen may affect the imputation process, as reported by Picornell et al. [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. Indeed, pollen distribution, load, and seasonality differ according to the environment, climate, and phenology of the plant. All these factors may impact the imputation accuracy. So, for each pollen time series, we calculated the Variation index (VIn), an indicator of variability in pollen concentrations between consecutive days, based on Picornell et al (2021) [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]:</p>
<list list-type="roman-lower">
<list-item><p>the moving mean and SD on consecutive 2 days within the pollen season;</p></list-item>
<list-item><p>the moving coefficients of variation (CV), as the ratio of the moving SD and moving mean;</p></list-item>
<list-item><p>the VIn, defined as the average of the moving CV over the pollen season.</p></list-item>
</list>
<p>Then, we related the median RMSE and the VIn using boxplots to explore the relation between imputation accuracy and pollen variability. Moreover, we employed multiple linear regression models stratified by pollen and monitoring station (M1: <italic>Alnus</italic> and BZ2 station; M2: <italic>Alnus</italic> and VI1 station; M3: Poaceae and BZ2 station; M4: Poaceae and VI1 station) to further explore this relation. The dependent variable was the RMSE from the 100 replications by all the simulations (total of 4800 time series), which we log-transformed to satisfy the normality assumption in linear regression. In addition, we applied a robust estimator of standard errors to relax the homoskedasticity assumption. Model covariates included the imputation method, proportion of NAs, and gap length. We exponentiated the regression coefficient β (Exp(β)) to provide an estimate of the relative change in RMSE.</p>
</sec>
</sec>
<sec id="sec007" sec-type="results">
<title>Results</title>
<sec id="sec008">
<title>Pollen data description</title>
<p><xref ref-type="table" rid="pone.0314005.t002">Table 2</xref> reports descriptive statistics of pollen observations in the pollen season 2022.</p>
<table-wrap id="pone.0314005.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.t002</object-id>
<label>Table 2</label> <caption><title>Descriptive statistics of pollen data in the pollen season 2022.</title></caption>
<alternatives>
<graphic id="pone.0314005.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="left">Monitoring station</th>
<th align="left">Mean ± SD (p/m<sup>3</sup>)</th>
<th align="left">CV (%)</th>
<th align="left">VIn (%)</th>
<th align="left">1<sup>st</sup> quartile (p/m<sup>3</sup>)</th>
<th align="left">Median (p/m<sup>3</sup>)</th>
<th align="left">3<sup>rd</sup> quartile (p/m<sup>3</sup>)</th>
<th align="left">Maximum (p/m<sup>3</sup>)</th>
<th align="left">Duration of the season (days)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="2"><italic>Alnus</italic></td>
<td align="left">BZ2</td>
<td align="left">12.1±24.1</td>
<td align="left">198.8</td>
<td align="left">12.1</td>
<td align="left">0.5</td>
<td align="left">2.0</td>
<td align="left">10.4</td>
<td align="left">132.4</td>
<td align="left">122</td>
</tr>
<tr>
<td align="left">VI1</td>
<td align="left">50.1±129.5</td>
<td align="left">258.7</td>
<td align="left">50.1</td>
<td align="left">0.0</td>
<td align="left">1.5</td>
<td align="left">23.6</td>
<td align="left">852.6</td>
<td align="left">47</td>
</tr>
<tr>
<td align="center" rowspan="2">Poaceae</td>
<td align="left">BZ2</td>
<td align="left">14.5±22.1</td>
<td align="left">152.9</td>
<td align="left">14.5</td>
<td align="left">1.0</td>
<td align="left">4.7</td>
<td align="left">20.2</td>
<td align="left">135.4</td>
<td align="left">150</td>
</tr>
<tr>
<td align="left">VI1</td>
<td align="left">47.2±71.3</td>
<td align="left">150.9</td>
<td align="left">47.2</td>
<td align="left">10.3</td>
<td align="left">21.4</td>
<td align="left">51.8</td>
<td align="left">564.4</td>
<td align="left">180</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t002fn001"><p>SD: Standard Deviation; CV: Coefficient of Variation; VIn: Variation Index; p/m<sup>3</sup>: pollen/cubic meter; BZ2: Bolzano; VI1: Vicenza.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>For both pollen types, the mean and SD presented higher values in the VI1 monitoring station than in BZ2. For <italic>Alnus</italic>, the duration of the pollen season was shorter in VI1 compared to BZ2, but pollen variability was higher. Instead, Poaceae showed a shorter pollen season in BZ2 than in VI1, but a higher variability in VI1 in terms of VIn.</p>
</sec>
<sec id="sec009">
<title>Performance analysis</title>
<p>No specific pattern resulted in the distribution of median RMSE values for pollen and station by imputation methods (<xref ref-type="fig" rid="pone.0314005.g004">Fig 4</xref>).</p>
<fig id="pone.0314005.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Distribution of the median Root Mean Square Error (RMSE) values for pollen/station by imputation method.</title>
<p>BZ2: Bolzano; VI1: Vicenza; GSVD: Gappy Singular Value Decomposition; p/m<sup>3</sup>: pollen/cubic meter. Each box represents the distribution of the median RMSE from the 12 simulations.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.g004" xlink:type="simple"/>
</fig>
<p>The variability in the distribution of median RMSE was lower at the BZ2 monitoring station (<italic>Alnus</italic>: from 0.6 to 9.5 p/m<sup>3</sup>; Poaceae: from 0.9 to 8.2 p/m<sup>3</sup>) and higher at the VI1 monitoring station (<italic>Alnus</italic>: from 1.5 to 56.5 p/m<sup>3</sup>; Poaceae: from 4.1 to 27.6 p/m<sup>3</sup>).</p>
<p>When examining the relationship between the median RMSE values and VIn (<xref ref-type="fig" rid="pone.0314005.g005">Fig 5</xref>), a trend of increasing RMSE with higher VIn values emerged. Moreover, higher variability in the distribution of median RMSE values increased with higher VIn values.</p>
<fig id="pone.0314005.g005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.g005</object-id>
<label>Fig 5</label>
<caption>
<title>Distribution of median Root Mean Square Error (RMSE) for the Variation Index by pollen/station.</title>
<p>BZ2: Bolzano; VI1: Vicenza; p/m<sup>3</sup>: pollen/cubic meter. Each box represents the distribution of the median RMSE from the 12 simulations imputed with the 3 methods.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.g005" xlink:type="simple"/>
</fig>
<p>Based on the results of the multiple linear regression models, there was large variability in imputation accuracy across the methods investigated, and none of them outperformed the others when adjusting for the simulation scenario (<xref ref-type="table" rid="pone.0314005.t003">Table 3</xref>).</p>
<table-wrap id="pone.0314005.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0314005.t003</object-id>
<label>Table 3</label> <caption><title>Association estimates (Exp(β) representing ratios of geometric means) with 95%CI between the Root Mean Square Error (RMSE) and covariates (imputation method, % of NAs, and gap length).</title></caption>
<alternatives>
<graphic id="pone.0314005.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.t003" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="left">M1 (<italic>Alnus</italic>, BZ2)</th>
<th align="left">M2 (<italic>Alnus</italic>, VI1)</th>
<th align="left">M3 (Poaceae, BZ2)</th>
<th align="left">M4 (Poaceae, VI1)</th>
</tr>
<tr>
<th align="left"/>
<th align="left">Exp(β) (95%CI)</th>
<th align="left">Exp(β) (95%CI)</th>
<th align="left">Exp(β) (95%CI)</th>
<th align="left">Exp(β) (95%CI)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Imputation method:</td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">Moving mean</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
</tr>
<tr>
<td align="left">GSVD interp 10m</td>
<td align="left">1.20 (1.05–1.38)</td>
<td align="left">1.00 (0.81–1.24)</td>
<td align="left">0.88 (0.80–0.97)</td>
<td align="left">0.94 (0.85–1.04)</td>
</tr>
<tr>
<td align="left">GSVD mean 5m</td>
<td align="left">1.03 (0.89–1.18)</td>
<td align="left">1.13 (0.93–1.38)</td>
<td align="left">1.10 (1.00–1.21)</td>
<td align="left">0.92 (0.83–1.00)</td>
</tr>
<tr>
<td align="left">% of NAs:</td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">5</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">2.12 (1.80–2.49)</td>
<td align="left">3.76 (2.93–4.84)</td>
<td align="left">1.94 (1.72–2.19)</td>
<td align="left">1.80 (1.60–2.01)</td>
</tr>
<tr>
<td align="left">25</td>
<td align="left">5.85 (5.11–6.69)</td>
<td align="left">9.86 (8.03–12.10)</td>
<td align="left">3.84 (3.49–4.22)</td>
<td align="left">3.74 (3.39–4.13)</td>
</tr>
<tr>
<td align="left">Gap length (days):</td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">3</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
<td align="left">Ref.</td>
</tr>
<tr>
<td align="left">5</td>
<td align="left">0.92 (0.79–1.08)</td>
<td align="left">1.14 (0.88–1.46)</td>
<td align="left">0.84 (0.75–0.94)</td>
<td align="left">0.90 (0.80–1.02)</td>
</tr>
<tr>
<td align="left">7</td>
<td align="left">0.65 (0.56–0.76)</td>
<td align="left">0.49 (0.39–0.63)</td>
<td align="left">0.94 (0.85–1.03)</td>
<td align="left">0.91 (0.82–1.01)</td>
</tr>
<tr>
<td align="left">10</td>
<td align="left">0.76 (0.66–0.88)</td>
<td align="left">0.74 (0.60–0.92)</td>
<td align="left">0.77 (0.69–0.85)</td>
<td align="left">0.78 (0.71–0.87)</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t003fn001"><p>BZ2: Bolzano; VI1: Vicenza; CI: Confidence Interval; GSVD: Gappy Singular Value Decomposition; NAs: missing values; Ref.: reference category.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Moreover, no consistency was found within the GSVD imputation method, even showing contrasting results as in model M3. There was instead a consistent association between the simulation settings and imputation accuracy. In fact, the RMSE increased with an increasing proportion of NAs across all models. Notably, the RMSE was 4 to 10 times higher when NAs were set to 25%, compared to the reference of 5%. On the contrary, the RMSE decreased with gap length, showing minimum values at 7 days (M1 and M2) and 10 days (M3 and M4).</p>
</sec>
</sec>
<sec id="sec010">
<title>Discussion and conclusions</title>
<p>A simulation study was conducted to compare the imputation accuracy of two methodologies, applying and evaluating for the first time the GSVD method to aerobiological datasets. Promising results emerged, demonstrating a similar performance of GSVD in comparison to the well-established moving mean method of the “AeRobiology” R package. However, it was found that both the inherent variability in observed pollen concentrations and the pattern of missing data had a more substantial impact on imputation accuracy within aerobiological datasets than the interpolation method applied. These findings contribute to filling the gap of knowledge in this field, considering the limited number of simulation studies conducted on pollen time series [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>, <xref ref-type="bibr" rid="pone.0314005.ref013">13</xref>].</p>
<p>We compared univariate and multivariate methods of interpolation specifically focusing on aerobiological datasets. Previous simulation studies on other types of environmental data (e.g. hydrological, meteorological, air quality) have favoured multivariate methods, leveraging information from other temporal series, over univariate methods, which rely solely on the data series itself [<xref ref-type="bibr" rid="pone.0314005.ref020">20</xref>–<xref ref-type="bibr" rid="pone.0314005.ref022">22</xref>]. On one hand, we used the moving mean algorithm from the “AeRobiology” R package as univariate method, which was specifically designed for aerobiological datasets. This algorithm was identified within the package as the interpolation method with better performance, attributed to its reduced sensitivity to data availability, time series length, and fluctuations in pollen concentrations across consecutive days [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. Its simplicity and increasing usage in aerobiological studies underscores its relevance and effectiveness in reconstructing time series. On the other hand, we used the GSVD algorithm as multivariate method, first evaluating its performance on aerobiological datasets. The potential of this method lies in its ability to reduce data dimensionality through data-driven decomposition, identifying the main data patterns related to physics without requiring any assumptions [<xref ref-type="bibr" rid="pone.0314005.ref015">15</xref>–<xref ref-type="bibr" rid="pone.0314005.ref017">17</xref>]. This makes it a promising tool for dataset reconstruction, as evidenced by its strong generalization capabilities across different types of data. However, more applications of the GSVD method on aerobiological data are needed to evaluate its effectiveness across diverse pollen types and environmental conditions. Indeed, the GSVD performance resulted similar to that of the statistical approach, with both methods exhibiting similarly unsatisfactory imputation accuracy in some settings. Moreover, the comparison of these two methods in our study revealed insights into the various factors influencing imputation performance. It suggested that the specific characteristics and requirements of the dataset may play a significant role in determining the most suitable interpolation approach.</p>
<p>The challenge of imputing missing data in aerobiological datasets is compounded by the complexity of plant phenology and pollen diffusion and advection mechanisms. Beyond their non-normal statistical distribution, each pollen type is influenced by local environmental and climatic conditions, resulting in differences in quantity, seasonality, and daily concentrations patterns [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. Meteorological factors, particularly temperature and precipitation, are widely acknowledged to have the greatest influence on pollen variability, affecting both phenological phases and pollen behaviour in the atmosphere [<xref ref-type="bibr" rid="pone.0314005.ref029">29</xref>, <xref ref-type="bibr" rid="pone.0314005.ref030">30</xref>]. Hence, the same pollen type may exhibit different distribution curves depending on the location characteristics [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. Such variability has been related to decreased accuracy in imputation, as wider concentration ranges between consecutive days heighten the likelihood of errors during the imputation process [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>]. This association has been observed in other environmental data as well [<xref ref-type="bibr" rid="pone.0314005.ref031">31</xref>]. Our findings align with Picornell et al. (2021) [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>], indicating that higher variability in concentration (VIn) resulted in less accurate imputation results, both in terms of values and range of variability of the imputation error. Notably, measurements from the Vicenza station showed greater variability, likely attributable to the effect of continental climate characteristics of the lowlands on pollen, subjected to significant thermal fluctuations compared to alpine regions. Additionally, <italic>Alnus</italic> pollen generally displayed higher VIn values compared to Poaceae pollen, likely due to significant variability over a shorter season duration.</p>
<p>Besides the pollen type and location of the monitoring station, the pattern of missing data had the most substantial impact on imputation accuracy in our study. We generated missing data by introducing fixed consecutive-day gaps at various percentages in aerobiological datasets. The results showed a trend of increasing imputation error with higher percentages of NAs, regardless of the pollen/location. Our results align with the findings of Junger et al. (2015) concerning air pollution data, indicating that 5% of missing data yields satisfactory results, but accuracy decreases with more than 10% missing data [<xref ref-type="bibr" rid="pone.0314005.ref021">21</xref>]. In contrast, one study found opposing trends with increasing percentages of missing data for different meteorological variables [<xref ref-type="bibr" rid="pone.0314005.ref031">31</xref>], while another study observed no specific trend between missing data percentage and imputation error in aerobiological databases [<xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>].</p>
<p>Regarding the gap length, our findings differ from those of Picornell et al. (2021) [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>], as we observed that interpolation error decreases with longer gap lengths, depending on the pollen type. Specifically, the imputation error was minimum in datasets with gaps of 7 consecutive days for <italic>Alnus</italic>, and with gaps of 10 days for Poaceae, compared to gaps of 3 days. Despite the higher possibility of abrupt variations in longer gaps [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>], the observed decrease in error with longer gaps can be attributed to the smoothing effect of interpolation. This effect leads to a reduction in the likelihood of generating peaks through interpolation, thereby minimizing errors. Notably, this effect appears to be more pronounced for pollens with wider season duration and less variability, as seen for Poaceae. The abundance of pollen-producing plants within this family, comprising over 120 genera in Italy, leads to high atmospheric pollen levels persisting over extended periods, thereby reducing day-to-day variability and smoothing peaks in pollen concentrations.</p>
<p>In conclusion, missing data resulting from manual measurement are common in aerobiological datasets [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0314005.ref005">5</xref>, <xref ref-type="bibr" rid="pone.0314005.ref021">21</xref>]. Therefore, imputation remains the best solution for dealing with incomplete datasets and is useful for improving aerobiological analysis [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0314005.ref032">32</xref>]. In fact, even small gaps can distort estimates in environmental epidemiology or climatological studies [<xref ref-type="bibr" rid="pone.0314005.ref013">13</xref>]. Omitting to address missing data can result in significant errors in analysing pollen time series, which in turn can affect the definition of pollen seasonality [<xref ref-type="bibr" rid="pone.0314005.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0314005.ref007">7</xref>, <xref ref-type="bibr" rid="pone.0314005.ref008">8</xref>]. We introduced and tested a novel method for missing data imputation in aerobiological research, demonstrating comparable performance to the moving mean method in data reconstruction. Both methods yielded favourable results, with the moving mean method being the simpler option. However, the imputation error remained unacceptable for certain pollen types and missing data scenarios. Additional research is required to investigate the application of the GSVD method across diverse pollen types and environmental conditions to draw a definitive conclusion. Furthermore, incorporating meteorological data into pollen datasets should be considered to improve imputation accuracy. Finally, there is a need to improve current imputation methods and develop more reliable techniques specifically tailored to pollen data, aiming to minimize the impact of temporal variability in pollen concentrations on imputation error.</p>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pone.0314005.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Picornell</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Oteros</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Ruiz-Mata</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Recio</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Trigo</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Martínez-Bracero</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>Methods for interpolating missing data in aerobiological databases</article-title>. <source>Environmental Research</source>. <year>2021</year>;<volume>200</volume>: <fpage>111391</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.envres.2021.111391" xlink:type="simple">10.1016/j.envres.2021.111391</ext-link></comment> <object-id pub-id-type="pmid">34058184</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vélez-Pereira</surname> <given-names>AM</given-names></name>, <name name-style="western"><surname>De Linares</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Belmonte</surname> <given-names>J</given-names></name>. <article-title>Aerobiological modeling I: A review of predictive models</article-title>. <source>Science of The Total Environment</source>. <year>2021</year>;<volume>795</volume>: <fpage>148783</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2021.148783" xlink:type="simple">10.1016/j.scitotenv.2021.148783</ext-link></comment> <object-id pub-id-type="pmid">34243002</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Tagliaferro</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Adani</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Pepe</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Briganti</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>D’Isidoro</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bonini</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>The impact of the spatial resolution of vegetation cover on the prediction of airborne pollen concentrations over northern Italy.</article-title> <source>Agricultural and Forest Meteorology</source>. <year>2024</year>;<volume>355</volume>: <fpage>110153</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.agrformet.2024.110153" xlink:type="simple">10.1016/j.agrformet.2024.110153</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Matavulj</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Cristofori</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Cristofolini</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Gottardini</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Brdar</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Sikoparija</surname> <given-names>B</given-names></name>. <article-title>Integration of reference data from different Rapid-E devices supports automatic pollen detection in more locations</article-title>. <source>Science of The Total Environment</source>. <year>2022</year>;<volume>851</volume>: <fpage>158234</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2022.158234" xlink:type="simple">10.1016/j.scitotenv.2022.158234</ext-link></comment> <object-id pub-id-type="pmid">36007635</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Navares</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Aznarte</surname> <given-names>JL</given-names></name>. <article-title>Geographical Imputation of Missing Poaceae Pollen Data via Convolutional Neural Networks.</article-title> <source>Atmosphere</source>. <year>2019</year>;<volume>10</volume>: <fpage>717</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/atmos10110717" xlink:type="simple">10.3390/atmos10110717</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rojo</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Picornell</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Oteros</surname> <given-names>J</given-names></name>. <article-title>AeRobiology: The computational tool for biological data in the air.</article-title> <name name-style="western"><surname>Price</surname> <given-names>S</given-names></name>, editor. <source>Methods Ecol Evol</source>. <year>2019</year>;<volume>10</volume>: <fpage>1371</fpage>–<lpage>1376</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/2041-210X.13203" xlink:type="simple">10.1111/2041-210X.13203</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Smith</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Jäger</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Berger</surname> <given-names>U</given-names></name>, <name name-style="western"><surname>Šikoparija</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Hallsdottir</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Sauliene</surname> <given-names>I</given-names></name>, <etal>et al</etal>. <article-title>Geographic and temporal variations in pollen exposure across <sc>E</sc> urope</article-title>. <source>Allergy</source>. <year>2014</year>;<volume>69</volume>: <fpage>913</fpage>–<lpage>923</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/all.12419" xlink:type="simple">10.1111/all.12419</ext-link></comment> <object-id pub-id-type="pmid">24816084</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Valipour Shokouhi</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>De Hoogh</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Gehrig</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Eeftens</surname> <given-names>M</given-names></name>. <article-title>Estimation of historical daily airborne pollen concentrations across Switzerland using a spatio temporal random forest model</article-title>. <source>Science of The Total Environment</source>. <year>2024</year>;<volume>906</volume>: <fpage>167286</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2023.167286" xlink:type="simple">10.1016/j.scitotenv.2023.167286</ext-link></comment> <object-id pub-id-type="pmid">37742957</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Damialis</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Halley</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Gioulekas</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Vokou</surname> <given-names>D</given-names></name>. <article-title>Long-term trends in atmospheric pollen levels in the city of Thessaloniki, Greece.</article-title> <source>Atmospheric Environment</source>. <year>2007</year>;<volume>41</volume>: <fpage>7011</fpage>–<lpage>7021</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.atmosenv.2007.05.009" xlink:type="simple">10.1016/j.atmosenv.2007.05.009</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>González-Fernández</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Álvarez-López</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Garrido</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Fernández-González</surname> <given-names>M</given-names></name>, Rodríguez-Rajo FcoJ. <article-title>Data mining assessment of Poaceae pollen influencing factors and its environmental implications</article-title>. <source>Science of The Total Environment</source>. <year>2022</year>;<volume>815</volume>: <fpage>152874</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2021.152874" xlink:type="simple">10.1016/j.scitotenv.2021.152874</ext-link></comment> <object-id pub-id-type="pmid">34999063</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Makra</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Matyasovszky</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Deák</surname> <given-names>ÁJ</given-names></name>. <article-title>Trends in the characteristics of allergenic pollen circulation in central Europe based on the example of Szeged, Hungary.</article-title> <source>Atmospheric Environment</source>. <year>2011</year>;<volume>45</volume>: <fpage>6010</fpage>–<lpage>6018</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.atmosenv.2011.07.051" xlink:type="simple">10.1016/j.atmosenv.2011.07.051</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Šikoparija</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Marko</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Panić</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Jakovetić</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Radišić</surname> <given-names>P</given-names></name>. <article-title>How to prepare a pollen calendar for forecasting daily pollen concentrations of Ambrosia, Betula and Poaceae?</article-title> <source>Aerobiologia</source>. <year>2018</year>;<volume>34</volume>: <fpage>203</fpage>–<lpage>217</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s10453-018-9507-9" xlink:type="simple">10.1007/s10453-018-9507-9</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Makra</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Matyasovszky</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Tusnády</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Ziska</surname> <given-names>LH</given-names></name>, <name name-style="western"><surname>Hess</surname> <given-names>JJ</given-names></name>, <name name-style="western"><surname>Nyúl</surname> <given-names>LG</given-names></name>, <etal>et al</etal>. <article-title>A temporally and spatially explicit, data-driven estimation of airborne ragweed pollen concentrations across Europe</article-title>. <source>Science of The Total Environment</source>. <year>2023</year>;<volume>905</volume>: <fpage>167095</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2023.167095" xlink:type="simple">10.1016/j.scitotenv.2023.167095</ext-link></comment> <object-id pub-id-type="pmid">37748607</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Marchetti</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Pesce</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Villani</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Antonicelli</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Ariano</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Attena</surname> <given-names>F</given-names></name>, <etal>et al</etal>. <article-title>Pollen concentrations and prevalence of asthma and allergic rhinitis in Italy: Evidence from the GEIRD study</article-title>. <source>Science of The Total Environment</source>. <year>2017</year>;<fpage>584</fpage>–<lpage>585</lpage>: 1093–1099. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2017.01.168" xlink:type="simple">10.1016/j.scitotenv.2017.01.168</ext-link></comment> <object-id pub-id-type="pmid">28169023</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Díaz-Morales</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Corrochano</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>López-Martín</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Le Clainche</surname> <given-names>S</given-names></name>. <article-title>Deep learning combined with singular value decomposition to reconstruct databases in fluid dynamics</article-title>. <source>Expert Systems with Applications</source>. <year>2024</year>;<volume>238</volume>: <fpage>121924</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.eswa.2023.121924" xlink:type="simple">10.1016/j.eswa.2023.121924</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hetherington</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Corrochano</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Abadía-Heredia</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Lazpita</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Muñoz</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Díaz</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>ModelFLOWs-app: data-driven post-processing and reduced order modelling tools.</article-title> <source>arXiv</source>; <year>2023</year>. Available: <ext-link ext-link-type="uri" xlink:href="http://arxiv.org/abs/2305.17150" xlink:type="simple">http://arxiv.org/abs/2305.17150</ext-link>.</mixed-citation></ref>
<ref id="pone.0314005.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hetherington</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Serfaty</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Corrochano</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Soria</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Clainche</surname> <given-names>SL</given-names></name>. <source>Data repairing and resolution enhancement using data-driven modal decomposition and deep learning</source>. <year>2024</year> [cited 8 Apr 2024]. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.48550/ARXIV.2401.11286" xlink:type="simple">10.48550/ARXIV.2401.11286</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Venturi</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Karniadakis</surname> <given-names>GE</given-names></name>. <article-title>Gappy data and reconstruction procedures for flow past a cylinder</article-title>. <source>J Fluid Mech</source>. <year>2004</year>;<volume>519</volume>: <fpage>315</fpage>–<lpage>336</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1017/S0022112004001338" xlink:type="simple">10.1017/S0022112004001338</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Beckers</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Rixen</surname> <given-names>M</given-names></name>. <article-title>EOF Calculations and Data Filling from Incomplete Oceanographic Datasets</article-title>. <source>J Atmos Oceanic Technol</source>. <year>2003</year>;<volume>20</volume>: <fpage>1839</fpage>–<lpage>1856</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1175/1520-0426%282003%29020%26lt%3B1839%3AECADFF%26gt%3B2.0.CO%3B2" xlink:type="simple">10.1175/1520-0426(2003)020&lt;1839:ECADFF&gt;2.0.CO;2</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bleidorn</surname> <given-names>MT</given-names></name>, <name name-style="western"><surname>Pinto</surname> <given-names>WDP</given-names></name>, <name name-style="western"><surname>Schmidt</surname> <given-names>IM</given-names></name>, <name name-style="western"><surname>Mendonça</surname> <given-names>ASF</given-names></name>, <name name-style="western"><surname>Reis</surname> <given-names>JATD</given-names></name>. <article-title>Methodological approaches for imputing missing data into monthly flows series.</article-title> <source>Rev ambiente água.</source> <year>2022</year>;<volume>17</volume>: <fpage>1</fpage>–<lpage>27</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.4136/ambi-agua.2795" xlink:type="simple">10.4136/ambi-agua.2795</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Junger</surname> <given-names>WL</given-names></name>, <name name-style="western"><surname>Ponce De Leon</surname> <given-names>A</given-names></name>. <article-title>Imputation of missing data in time series for air pollutants</article-title>. <source>Atmospheric Environment</source>. <year>2015</year>;<volume>102</volume>: <fpage>96</fpage>–<lpage>104</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.atmosenv.2014.11.049" xlink:type="simple">10.1016/j.atmosenv.2014.11.049</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Nelsen</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Williams</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Williams</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Berrett</surname> <given-names>C</given-names></name>. <article-title>An Empirical Mode-Spatial Model for Environmental Data Imputation.</article-title> <source>Hydrology</source>. <year>2018</year>;<volume>5</volume>: <fpage>63</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/hydrology5040063" xlink:type="simple">10.3390/hydrology5040063</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Plaia</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Bondi</surname> <given-names>A</given-names></name>. <article-title>Single imputation method of missing values in environmental pollution data sets</article-title>. <source>Atmospheric Environment</source>. <year>2006</year>;<volume>40</volume>: <fpage>7316</fpage>–<lpage>7330</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.atmosenv.2006.06.040" xlink:type="simple">10.1016/j.atmosenv.2006.06.040</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref024"><label>24</label><mixed-citation publication-type="other" xlink:type="simple">ARPAV. Il monitoraggio aerobiologico nel Veneto: i pollini allergenici. Padova: Agenzia Regionale per la Prevenzione e protezione Ambientale del Veneto; 2004. Available: <ext-link ext-link-type="uri" xlink:href="https://www.arpa.veneto.it/arpavinforma/pubblicazioni/il-monitoraggio-aerobiologico-nel-veneto-i-pollini-allergenici" xlink:type="simple">https://www.arpa.veneto.it/arpavinforma/pubblicazioni/il-monitoraggio-aerobiologico-nel-veneto-i-pollini-allergenici</ext-link>.</mixed-citation></ref>
<ref id="pone.0314005.ref025"><label>25</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Ogden</surname> <given-names>EC</given-names></name>, <source>New York State Museum and Science Service, U.S. Atomic Energy Commission, editors. Manual for sampling airborne pollen.</source> <publisher-loc>New York</publisher-loc>: <publisher-name>Hafner Press</publisher-name>; <year>1974</year>.</mixed-citation></ref>
<ref id="pone.0314005.ref026"><label>26</label><mixed-citation publication-type="book" xlink:type="simple"><collab>RStudio Core Team</collab>. <source>R: A language and environment for statistical computing</source>. <publisher-loc>Vienna, Austria</publisher-loc>: <publisher-name>R Foundation for Statistical Computing</publisher-name>; <year>2023</year>. Available: <ext-link ext-link-type="uri" xlink:href="https://www.R-project.org/" xlink:type="simple">https://www.R-project.org/</ext-link>.</mixed-citation></ref>
<ref id="pone.0314005.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Andersen</surname> <given-names>TB</given-names></name>. <article-title>A model to predict the beginning of the pollen season.</article-title> <source>Grana</source>. <year>1991</year>;<volume>30</volume>: <fpage>269</fpage>–<lpage>275</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/00173139109427810" xlink:type="simple">10.1080/00173139109427810</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kasam</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>BD</given-names></name>, <name name-style="western"><surname>Paredis</surname> <given-names>CJJ</given-names></name>. <article-title>Statistical methods for interpolating missing meteorological data for use in building simulation.</article-title> <source>Build Simul</source>. <year>2014</year>;<volume>7</volume>: <fpage>455</fpage>–<lpage>465</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s12273-014-0174-7" xlink:type="simple">10.1007/s12273-014-0174-7</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Blanco-Alegre</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Castro</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Calvo</surname> <given-names>AI</given-names></name>, <name name-style="western"><surname>Oduber</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Fernández-González</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Valencia-Barrera</surname> <given-names>RM</given-names></name>, <etal>et al</etal>. <article-title>Towards a model of wet deposition of bioaerosols: The raindrop size role</article-title>. <source>Science of The Total Environment</source>. <year>2021</year>;<volume>767</volume>: <fpage>145426</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2021.145426" xlink:type="simple">10.1016/j.scitotenv.2021.145426</ext-link></comment> <object-id pub-id-type="pmid">33550056</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schramm</surname> <given-names>PJ</given-names></name>, <name name-style="western"><surname>Brown</surname> <given-names>CL</given-names></name>, <name name-style="western"><surname>Saha</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Conlon</surname> <given-names>KC</given-names></name>, <name name-style="western"><surname>Manangan</surname> <given-names>AP</given-names></name>, <name name-style="western"><surname>Bell</surname> <given-names>JE</given-names></name>, <etal>et al</etal>. <article-title>A systematic review of the effects of temperature and precipitation on pollen concentrations and season timing, and implications for human health</article-title>. <source>Int J Biometeorol</source>. <year>2021</year>;<volume>65</volume>: <fpage>1615</fpage>–<lpage>1628</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00484-021-02128-7" xlink:type="simple">10.1007/s00484-021-02128-7</ext-link></comment> <object-id pub-id-type="pmid">33877430</object-id></mixed-citation></ref>
<ref id="pone.0314005.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yozgatligil</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Aslan</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Iyigun</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Batmaz</surname> <given-names>I</given-names></name>. <article-title>Comparison of missing value imputation methods in time series: the case of Turkish meteorological data.</article-title> <source>Theor Appl Climatol</source>. <year>2013</year>;<volume>112</volume>: <fpage>143</fpage>–<lpage>167</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00704-012-0723-x" xlink:type="simple">10.1007/s00704-012-0723-x</ext-link></comment></mixed-citation></ref>
<ref id="pone.0314005.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gehrig</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Clot</surname> <given-names>B</given-names></name>. <article-title>50 Years of Pollen Monitoring in Basel (Switzerland) Demonstrate the Influence of Climate Change on Airborne Pollen.</article-title> <source>FrontAllergy.</source> <year>2021</year>;<volume>2</volume>: <fpage>677159</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/falgy.2021.677159" xlink:type="simple">10.3389/falgy.2021.677159</ext-link></comment> <object-id pub-id-type="pmid">35387022</object-id></mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pone.0314005.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0314005.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Cowley</surname>
<given-names>Hugh</given-names>
</name>
<role>Staff Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2024</copyright-year>
<copyright-holder>Hugh Cowley</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0314005" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">9 Sep 2024</named-content>
</p>
<p><!-- <div> -->PONE-D-24-22455<!-- </div> --><!-- <div> -->A new method based on physical patterns to impute aerobiological datasets<!-- </div> --><!-- <div> -->PLOS ONE</p>
<p>Dear Dr. Marcon,</p>
<p>Thank you for submitting your manuscript to PLOS ONE. After careful consideration, we feel that it has merit but does not fully meet PLOS ONE’s publication criteria as it currently stands. Therefore, we invite you to submit a revised version of the manuscript that addresses the points raised during the review process.</p>
<p><bold>Your manuscript has been evaluated by three reviewers, and their comments are appended below and in the attached file.</bold><!-- </div> --><!-- <div> --> <!-- </div> --><!-- <div> --><bold>The reviewers have primarily requested clarification of some aspects of your study, however, please address in detail the comment from Reviewer 2 regarding calculation of the start and end dates of the pollen season for each pollen type. Please ensure you address each of the reviewers' comments when revising your manuscript.</bold><!-- </div> --><!-- <div> --> <!-- </div> --><!-- <div> --><bold>We note that one or more reviewers has recommended that you cite specific previously published works. As always, we recommend that you please review and evaluate the requested works to determine whether they are relevant and should be cited. It is not a requirement to cite these works. We appreciate your attention to this request.</bold></p>
<p>Please submit your revised manuscript by Oct 21 2024 11:59PM. If you will need more time than this to complete your revisions, please reply to this message or contact the journal office at <email xlink:type="simple">plosone@plos.org</email>. When you're ready to submit your revision, log on to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">https://www.editorialmanager.com/pone/</ext-link> and select the 'Submissions Needing Revision' folder to locate your manuscript file.</p>
<p>Please include the following items when submitting your revised manuscript:<!-- </div> --><list list-type="bullet"><list-item><p>A rebuttal letter that responds to each point raised by the academic editor and reviewer(s). You should upload this letter as a separate file labeled 'Response to Reviewers'.</p></list-item><list-item><p>A marked-up copy of your manuscript that highlights changes made to the original version. You should upload this as a separate file labeled 'Revised Manuscript with Track Changes'.</p></list-item><list-item><p>An unmarked version of your revised paper without tracked changes. You should upload this as a separate file labeled 'Manuscript'.</p></list-item></list></p>
<p>If you would like to make changes to your financial disclosure, please include your updated statement in your cover letter. Guidelines for resubmitting your figure files are available below the reviewer comments at the end of this letter.</p>
<p>If applicable, we recommend that you deposit your laboratory protocols in protocols.io to enhance the reproducibility of your results. Protocols.io assigns your protocol its own identifier (DOI) so that it can be cited independently in the future. For instructions see: <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols" xlink:type="simple">https://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols</ext-link>. Additionally, PLOS ONE offers an option for publishing peer-reviewed Lab Protocol articles, which describe protocols hosted on protocols.io. Read more information on sharing protocols at <ext-link ext-link-type="uri" xlink:href="https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols" xlink:type="simple">https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols</ext-link>.</p>
<p>We look forward to receiving your revised manuscript.</p>
<p>Kind regards,</p>
<p>Hugh Cowley</p>
<p>Staff Editor</p>
<p>PLOS ONE</p>
<p><bold>Journal Requirements:</bold></p>
<p>1. When submitting your revision, we need you to address these additional requirements.</p>
<p>Please ensure that your manuscript meets PLOS ONE's style requirements, including those for file naming. The PLOS ONE style templates can be found at </p>
<p><ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf</ext-link> and </p>
<p><ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf</ext-link></p>
<p>2. Please note that PLOS ONE has specific guidelines on code sharing for submissions in which author-generated code underpins the findings in the manuscript. In these cases, we expect all author-generated code to be made available without restrictions upon publication of the work. Please review our guidelines at <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/materials-and-software-sharing#loc-sharing-code" xlink:type="simple">https://journals.plos.org/plosone/s/materials-and-software-sharing#loc-sharing-code</ext-link> and ensure that your code is shared in a way that follows best practice and facilitates reproducibility and reuse.</p>
<p>3. Thank you for stating in your Funding Statement: </p>
<p>This research has received grants from the European Union through the Italian Ministry of University and Research under the ESF REACT-EU Green and Innovation funding programme (Ministerial Decree 1061/2021) and the NextGenerationEu funding programme (Ministerial Decree 737/2021). Article processing charges were supported by the special fund at the University of Verona dedicated to Open Access publications. The funder had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
<p>Please provide an amended statement that declares *all* the funding or sources of support (whether external or internal to your organization) received during this study, as detailed online in our guide for authors at <ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/submit-now" xlink:type="simple">http://journals.plos.org/plosone/s/submit-now</ext-link>.  Please also include the statement “There was no additional external funding received for this study.” in your updated Funding Statement. </p>
<p>Please include your amended Funding Statement within your cover letter. We will change the online submission form on your behalf.</p>
<p>[Note: HTML markup is below. Please do not edit.]</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p><bold>Reviewer #1:</bold> This study (A new method based on physical patterns to impute aerobiological datasets) includes an analysis of the effectiveness of Gappy Singular Value Decomposition (GSVD) in aerobiological datasets. It addresses a clear and evident need in the field of aerobiology: the lack of data on pollen concentration on some days. I would like to congratulate the authors of the paper because it is very well written. Thus, I consider that this paper, after minor corrections, is suitable for publication in PLOS ONE. Some minor suggestions are listed below.</p>
<p>L177-179: Use roman numerals i and ii without italics.</p>
<p>L322: While reading this paragraph, as a researcher, I would opt for the "simpler" method. Perhaps add that more studies applying GSVD on different pollen types and environments are needed to reach a more generalized conclusion.</p>
<p>L327: Emphasize the importance of linking pollen data with meteorological data, as has been observed, for example, in this study <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.scitotenv.2021.145426" xlink:type="simple">https://doi.org/10.1016/j.scitotenv.2021.145426</ext-link></p>
<p><bold>Reviewer #2: </bold>Dear Authors,</p>
<p>My review is as follows.</p>
<p>In addition, it is attached as a pdf file for keeping its format.</p>
<p>Reviewer</p>
<p>Tagliaferro, S., Corrochano, A., Marchetti, P., Marcon, A., Le Clainche, S., 2024: A new method based on physical patterns to impute aerobiological datasets. PLOS ONE;</p>
<p>The authors conducted a novel simulation study to evaluate the effectiveness of Gappy Singular Value Decomposition (GSVD), as a data-driven approach, comparing it with the moving mean interpolation, as a statistical approach. However, high variability in pollen concentrations and the increasing number of missing data and the increasing lack of those around the sample mean negatively affected imputation accuracy.</p>
<p>My comments are as follows.</p>
<p>Comments:</p>
<p>• Line 123: You used the 95% method (start: 2.5th percentile; end: 97.5th percentile) when calculating the start and end dates of the pollen season for each pollen type.</p>
<p>However, if pollen concentrations and within-season distribution vary by year, such an approach is likely to contract or expand season duration independently from climate change. For example, if the API was 2000 for a given year, the start of the pollen season would be when the cumulative concentration reached 50; but if the API for the same location was 5000 for the following year, then the pollen season would not start until the cumulative concentration reached 125. Use of this system to mark the start and end of the pollen season would mask any climate-related or temperature-related changes associated with pollen season duration.</p>
<p>Therefore, rather than using start and end percentages, to determine the pollen season, it is widely used the first (last) date on which at least 1 pollen grain m−3 of air is recorded and at least 5 consecutive (preceding) days also show 1 or more pollen grains m−3 (Makra et al., 2012).</p>
<p>Reference</p>
<p>Makra, L., Matyasovszky, I., Bálint, B., 2012: Association of allergic asthma emergency room visits with the main biological and chemical air pollutants. Science of the Total Environment, 432, 288-296. doi:10.1016/j.scitotenv.2012.05.088</p>
<p>• Line 136: Graminaceae is the former name of the grass family. I recommend using Poaceae everywhere in the manuscript instead.</p>
<p>• Line 247: correctly: “For both pollen types”, instead of “For both pollens”;</p>
<p>• Line 341: correctly: “with”, instead of “whit”;</p>
<p>Reviewer</p>
<p><bold>Reviewer #3: </bold>The manuscript entitled ”A new method based on physical patterns to impute aerobiological datasets” by Tagliaferro et al. presents a novel method to handle missing values in the monitorization of bioaerosols, in comparison to the current method of managing missing data, while also analyzing which factors influence the accuracy of these methods. For this, the authors use pollen data from two different pollen species from two different monitoring stations. The topic of the paper is highly relevant in the context of worldwide increasing allergy prevalence and the influence of climate change on the distribution of respiratory allergens. The paper is overall well written explaining the methods employed for the data comparison.</p>
<p>There are a few minor comments regarding the manuscript</p>
<p>1. Could the authors add some information regarding what pollen genera are comprised within the examined Graminaceae pollen and discuss how this could influence the accuracy of the presented methods since they mention a lower variability in this pollen type and a longer pollen season?</p>
<p>2. Line 120: The authors mention analyzing Alnus and Poaceae pollen, whereas throughout the manuscript they use Graminaceae pollen</p>
<p>3. Line 341 – typo probably ”with” rather than ”whit”</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: No</p>
<p>Reviewer #3: No</p>
<p>**********</p>
<p>[NOTE: If reviewer comments were submitted as an attachment file, they will be attached to this email and accessible via the submission site. Please log into your account, locate the manuscript record, and check for the action link "View Attachments". If this link does not appear, there are no attachment files.]</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com/</ext-link>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Registration is free. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email PLOS at <email xlink:type="simple">figures@plos.org</email>. Please note that Supporting Information files do not need this step.</p>
<supplementary-material id="pone.0314005.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.s001" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Tagliaferro_rev_PONE-D-24-22455_reviewer.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pone.0314005.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0314005.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pone.0314005" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pone.0314005.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">10 Oct 2024</named-content>
</p>
<p>The answers to Reviewers' questions are available in the attached file "Response to Reviewers".</p>
<supplementary-material id="pone.0314005.s002" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pone.0314005.s002" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to Reviewers.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pone.0314005.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0314005.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Singh</surname>
<given-names>Rajeev</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2024</copyright-year>
<copyright-holder>Rajeev Singh</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0314005" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">5 Nov 2024</named-content>
</p>
<p>A new method based on physical patterns to impute aerobiological datasets</p>
<p>PONE-D-24-22455R1</p>
<p>Dear Dr. Marcon,</p>
<p>We’re pleased to inform you that your manuscript has been judged scientifically suitable for publication and will be formally accepted for publication once it meets all outstanding technical requirements.</p>
<p>Within one week, you’ll receive an e-mail detailing the required amendments. When these have been addressed, you’ll receive a formal acceptance letter and your manuscript will be scheduled for publication.</p>
<p>An invoice will be generated when your article is formally accepted. Please note, if your institution has a publishing partnership with PLOS and your article meets the relevant criteria, all or part of your publication costs will be covered. Please make sure your user information is up-to-date by logging into Editorial Manager at <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">Editorial Manager®</ext-link> and clicking the ‘Update My Information' link at the top of the page. If you have any questions relating to publication charges, please contact our Author Billing department directly at <email xlink:type="simple">authorbilling@plos.org</email>.</p>
<p>If your institution or institutions have a press office, please notify them about your upcoming paper to help maximize its impact. If they’ll be preparing press materials, please inform our press team as soon as possible -- no later than 48 hours after receiving the formal acceptance. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>Kind regards,</p>
<p>Rajeev Singh</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Additional Editor Comments (optional):</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. If the authors have adequately addressed your comments raised in a previous round of review and you feel that this manuscript is now acceptable for publication, you may indicate that here to bypass the “Comments to the Author” section, enter your conflict of interest statement in the “Confidential to Editor” section, and submit your "Accept" recommendation.<!-- </font> --></p>
<p>Reviewer #1: All comments have been addressed</p>
<p>Reviewer #2: All comments have been addressed</p>
<p>Reviewer #3: All comments have been addressed</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>Reviewer #3: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p>Reviewer #1: The content relating to the previous review has been updated and therefore this paper is suitable for publication in its current form.</p>
<p>Reviewer #2: Dear Authors,</p>
<p>I accept your answer to my comments and suggest to publish the revised version of your manuscript as it is.</p>
<p>Reviewer</p>
<p>Reviewer #3: Considering the authors' adequate and complete responses to all my comments, I recommend the article to be accepted for publication.</p>
<p>**********</p>
<p><!-- <font color="black"> -->7. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: No</p>
<p>Reviewer #3: No</p>
<p>**********</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pone.0314005.r004" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0314005.r004</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Singh</surname>
<given-names>Rajeev</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2024</copyright-year>
<copyright-holder>Rajeev Singh</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0314005" document-id-type="doi" document-type="article" id="rel-obj004" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">8 Nov 2024</named-content>
</p>
<p>PONE-D-24-22455R1 </p>
<p>PLOS ONE</p>
<p>Dear Dr.  Marcon, </p>
<p>I'm pleased to inform you that your manuscript has been deemed suitable for publication in PLOS ONE. Congratulations! Your manuscript is now being handed over to our production team.</p>
<p>At this stage, our production department will prepare your paper for publication. This includes ensuring the following:</p>
<p>* All references, tables, and figures are properly cited</p>
<p>* All relevant supporting information is included in the manuscript submission,</p>
<p>* There are no issues that prevent the paper from being properly typeset</p>
<p>If revisions are needed, the production department will contact you directly to resolve them. If no revisions are needed, you will receive an email when the publication date has been set. At this time, we do not offer pre-publication proofs to authors during production of the accepted work. Please keep in mind that we are working through a large volume of accepted articles, so please give us a few weeks to review your paper and let you know the next and final steps. </p>
<p>Lastly, if your institution or institutions have a press office, please let them know about your upcoming paper now to help maximize its impact. If they'll be preparing press materials, please inform our press team within the next 48 hours. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>If we can help with anything else, please email us at <email xlink:type="simple">customercare@plos.org</email>.</p>
<p>Thank you for submitting your work to PLOS ONE and supporting open access. </p>
<p>Kind regards, </p>
<p>PLOS ONE Editorial Office Staff</p>
<p>on behalf of</p>
<p>Dr. Rajeev Singh </p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
</body>
</sub-article>
</article>