<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<processing-meta>
<custom-meta-group content-type="composition">
<custom-meta specific-use="newgen" xlink:href="https://www.newgen.co/">
<meta-name>Composition Vendor</meta-name>
<meta-value>Newgen KnowledgeWorks (P) Ltd.</meta-value>
</custom-meta>
</custom-meta-group>
</processing-meta>
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLOS Digit Health</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosdh</journal-id>
<journal-title-group>
<journal-title>PLOS Digital Health</journal-title>
</journal-title-group>
<issn pub-type="epub">2767-3170</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pdig.0001031</article-id>
<article-id pub-id-type="publisher-id">PDIG-D-25-00724</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Computer and information sciences</subject><subj-group><subject>Artificial intelligence</subject><subj-group><subject>Machine learning</subject><subj-group><subject>Support vector machines</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Public and occupational health</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>People and places</subject><subj-group><subject>Population groupings</subject><subj-group><subject>Age groups</subject><subj-group><subject>Adults</subject><subj-group><subject>Young adults</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Mathematical and statistical techniques</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Forecasting</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Statistics</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Forecasting</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Addiction</subject><subj-group><subject>Nicotine addiction</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Addiction</subject><subj-group><subject>Nicotine addiction</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Mental health and psychiatry</subject><subj-group><subject>Substance-related disorders</subject><subj-group><subject>Nicotine addiction</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Public and occupational health</subject><subj-group><subject>Substance-related disorders</subject><subj-group><subject>Nicotine addiction</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Behavior</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Behavior</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Medicine and health sciences</subject><subj-group><subject>Epidemiology</subject><subj-group><subject>Medical risk factors</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Identifying factors associated with vaping cessation in young adults: A machine learning and XAI approach</article-title>
<alt-title alt-title-type="running-head">AI-driven insights into youth vaping cessation</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Satheeshkumar</surname>
<given-names>Poolakkad S.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Lango</surname>
<given-names>Ian</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Zafo</surname>
<given-names>Swarnali</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Ebanks</surname>
<given-names>Mikaiel</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Das</surname>
<given-names>Rahul Kumar</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Cheung</surname>
<given-names>Kit Wai</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Pili</surname>
<given-names>Roberto</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-5298-6962</contrib-id>
<name name-style="western">
<surname>Mahajan</surname>
<given-names>Supriya D.</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/project-administration/">Project administration</role>
<role content-type="http://credit.niso.org/contributor-roles/resources/">Resources</role>
<role content-type="http://credit.niso.org/contributor-roles/supervision/">Supervision</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff005"><sup>5</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Department of Medicine, Division of Hematology and Oncology, Jacobs School of Medicine and Biomedical Sciences, University at Buffalo, Buffalo, New York, United States of America</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>University at Buffalo, Buffalo, New York, United States of America</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Jacobs School of Medicine and Biomedical Sciences, Buffalo, New York, United States of America</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>University at Buffalo, School of Public Health and Health Professions, Buffalo, New York, United States of America</addr-line></aff>
<aff id="aff005"><label>5</label> <addr-line>Department of Medicine, Division of Allergy, Immunology &amp; Rheumatology, Jacobs School of Medicine and Biomedical Sciences, University at Buffalo, Buffalo, New York, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Sbaffi</surname>
<given-names>Laura</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/></contrib>
</contrib-group>
<aff id="edit1"><addr-line>The University of Sheffield, UNITED KINGDOM OF GREAT BRITAIN AND NORTHERN IRELAND</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">smahajan@buffalo.edu</email></corresp>
</author-notes>
<pub-date pub-type="epub"><day>5</day><month>5</month><year>2026</year></pub-date>
<pub-date pub-type="collection"><month>5</month><year>2026</year></pub-date>
<volume>5</volume>
<issue>5</issue>
<elocation-id>e0001031</elocation-id>
<history>
<date date-type="received"><day>15</day><month>9</month><year>2025</year></date>
<date date-type="accepted"><day>9</day><month>4</month><year>2026</year></date>
</history>
<permissions>
<copyright-year>2026</copyright-year>
<copyright-holder>Satheeshkumar et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pdig.0001031"/>
<abstract>
<p>The public health impact of vaping in the United States reflects a complex balance of potential benefits and emerging risks, as e‑cigarettes may reduce exposure to toxic combustion byproducts and support adult smoking cessation, yet growing evidence links vaping to respiratory and cardiovascular harm and youth uptake remains concerning, with 38.4% of adolescent users in 2024 reporting habitual use. To inform the optimal use of predictive technologies in cessation efforts, this study sought to characterize cessation‑related behaviors and attitudes among young adult vapers and evaluate machine learning and explainable AI methods for predicting quit attempts and cessation success. A social media–based survey captured behavioral, contextual, and demographic factors, and cessation was defined as self‑reported abstinence from all vaping products for at least 30 days. Predictors were identified using forward selection and backward elimination, and data were split into training and testing sets. Linear models (LASSO, ridge regression, elastic net) and nonlinear models (random forest, support vector machine) were trained and evaluated using AUC and Brier scores. Linear models demonstrated the strongest overall performance: LASSO achieved AUCs of 0.89 (training) and 0.91 (testing), ridge regression 0.88 and 0.93, and elastic net 0.91 for both sets. Nonlinear models showed signs of overfitting, with random forest achieving 0.99 in training but only 0.70 in testing, and SVM achieving 0.89 and 0.72. Key predictors included age, environmental triggers, vaping frequency, sex, and long‑term behavioral outlook. Individuals under 25 showed greater vulnerability to continued use, environmental cues, especially social exposure, were strongly associated with relapse, and erratic vaping patterns predicted lower cessation success. While these models highlight behavioral and contextual factors that may influence cessation, findings should be interpreted as exploratory given the cross‑sectional design and sample characteristics. Larger, longitudinal studies are needed to validate these insights and clarify the potential of predictive modeling to inform targeted public health interventions.</p>
</abstract>
<abstract abstract-type="summary">
<title>Author summary</title>
<p>Vaping has become increasingly common among young adults in the United States, yet many users struggle to quit despite growing awareness of potential health risks. To better understand this challenge, we surveyed young adult vapers about their behaviors, motivations, and experiences with trying to stop. We then used several machine‑learning approaches to see whether these patterns could help predict who attempts to quit and who succeeds.</p>
<p>Our findings show that a combination of personal habits and environmental influences plays a major role in cessation. Younger adults, especially those under 25, were more likely to continue vaping, and social situations often triggered relapse. People who vaped frequently or in irregular patterns had a harder time quitting, while differences between men and women suggested that tailored support strategies may be helpful. Among the predictive tools we tested, simpler linear models performed the most reliably.</p>
<p>This study highlights how data‑driven methods can help identify factors linked to vaping cessation, but it also underscores the need for larger, long‑term research. Our results should be viewed as early insights that can guide future work aimed at reducing nicotine dependence and supporting young adults who want to quit.</p>
</abstract>
<funding-group>
<funding-statement>The author(s) received no specific funding for this work.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="3"/>
<page-count count="1"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>De‑identified survey data, including all variables are used in the analyses. All analytical codes used for data cleaning, feature engineering, model development, and explainable AI analyses have a sharing restriction due to strict confidentiality agreements with our institutional IRB. The survey dataset used in this study contains sensitive information related to health behaviors among young adults from minority populations. Although all data used for analysis were de‑identified prior to use, the University at Buffalo Institutional Review Board (UBIRB) has determined that the dataset cannot be publicly shared due to ethical and legal restrictions intended to protect participant privacy. These restrictions are imposed by the UBIRB as part of the study’s exempt determination under 45 CFR 46.104. Because of these confidentiality requirements, the de‑identified dataset and the analytical code used for data cleaning, feature engineering, model development, and explainable AI analyses cannot be deposited in a public repository. Data access may be granted to qualified researchers who meet the criteria for confidential data access and agree to comply with UBIRB requirements. Requests for data access may be directed to: University at Buffalo Institutional Review Board (UBIRB) Office of Research Compliance Clinical and Translational Research Center, Room 5018 875 Ellicott Street, Buffalo, New York 14203, United States of America. Federalwide Assurance ID: FWA00008824 IRB Study ID: STUDY00005954 Email: <email xlink:type="simple">ub-irb@buffalo.edu</email>. The UBIRB reviewed and approved this study on January 27, 2022. The IRB oversees all decisions regarding access to the study data.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Vaping has become increasingly prevalent among young adults in the United States, with individuals aged 18–24 reporting the highest rates of e‑cigarette use. Although e‑cigarettes were initially promoted as a harm‑reduction tool for adult smokers, their rapid uptake among youth and young adults are driven by appealing flavors and perceptions of reduced risk, which has raised significant public health concerns. In 2024, 1.63 million middle and high school students reported current e‑cigarette use, and many young adults exhibit signs of nicotine dependence [<xref ref-type="bibr" rid="pdig.0001031.ref001">1</xref>–<xref ref-type="bibr" rid="pdig.0001031.ref004">4</xref>].</p>
<p>While some evidence suggests that e‑cigarettes may support smoking cessation for established smokers, emerging research highlights potential respiratory and cardiovascular risks, as well as high rates of habitual use among adolescents [<xref ref-type="bibr" rid="pdig.0001031.ref005">5</xref>–<xref ref-type="bibr" rid="pdig.0001031.ref009">9</xref>]. These patterns underscore the need for effective cessation strategies tailored to younger populations. Although digital interventions such as text‑based programs have shown promise in supporting quit attempts, cessation outcomes vary widely, and relapse remains common [<xref ref-type="bibr" rid="pdig.0001031.ref010">10</xref>,<xref ref-type="bibr" rid="pdig.0001031.ref011">11</xref>]. Vaping poses two significant challenges. For habitual smokers, e-cigarettes may diminish exposure to combustion byproducts and facilitate cessation. Nonetheless, associations with pulmonary and cardiovascular damage, together with elevated rates of regular use among adolescents [<xref ref-type="bibr" rid="pdig.0001031.ref012">12</xref>], heighten apprehensions regarding long-term reliance and health consequences. The influence of nicotine on the growing brain exacerbates susceptibility in persons under the age of 25. Support for cessation is essential, as 63.9% of adolescent vapers indicate a wish to quit, and 67.4% have made attempts to cease in the past year [<xref ref-type="bibr" rid="pdig.0001031.ref013">13</xref>]. Technology-based initiatives, including text-message treatments, have enhanced cessation rates by 35–40%. Predictive modeling provides additional value by identifying individuals at increased risk of ongoing use or relapse, facilitating customized therapy. Machine learning (ML) and explainable artificial intelligence (XAI) improve this by offering clear insights into critical variables, hence promoting confidence and actionable outcomes in public health applications.</p>
<p>A key challenge is identifying which behavioral, demographic, and contextual factors influence cessation success among young adult vapers. Predictive modeling, ML, and XAI offer a potential means of detecting complex patterns associated with quit attempts and sustained abstinence. However, limited research has applied these methods specifically to young adult vaping behaviors, leaving important gaps in understanding how predictive tools might inform targeted interventions [<xref ref-type="bibr" rid="pdig.0001031.ref012">12</xref>–<xref ref-type="bibr" rid="pdig.0001031.ref018">18</xref>].</p>
<p>To address this gap, the present study (1) characterizes cessation‑related behaviors and attitudes among young adult vapers and (2) evaluates ML and XAI approaches for predicting quit attempts and cessation success. By identifying key predictors and assessing model performance, this work aims to clarify how predictive technologies may complement existing public health strategies to reduce nicotine dependence among young adults.</p>
</sec>
<sec id="sec002" sec-type="results">
<title>Results</title>
<p>A total of 119 individuals participated in the study. The majority of respondents identified as Caucasian (58.5%), while the remaining 41.5% identified as African American, Hispanic, Asian, or other racial/ethnic backgrounds. In terms of age distribution, most participants were between 21 and 26 years old (74.6%), followed by 15–20-year-olds (16.1%). A smaller proportion (9.3%) fell into the combined age category of 27–32, 45–50, and 51–56 years. Female respondents comprised the majority of the sample (70.3%), with males accounting for 29.7%. No participants identified as another gender. <bold><xref ref-type="table" rid="pdig.0001031.t001">Table 1</xref></bold> summarizes participant characteristics and questionnaire responses, reporting both the number of individuals selecting each option and the corresponding percentage of the total sample.</p>
<table-wrap id="pdig.0001031.t001" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.t001</object-id><label>Table 1</label><caption><title>Population characteristics of the study. It summarizes participant characteristics and questionnaire responses, reporting both the number of individuals selecting each option and the corresponding percentage of the total sample.</title></caption>
<alternatives><graphic id="pdig.0001031.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.t001" xlink:type="simple"/><table><colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">n</th>
<th align="left">119</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Race (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">White (%)</td>
<td align="left"><bold>70 (58.8)</bold></td>
</tr>
<tr>
<td align="left">African American, Hispanic, Asian, and Others (%)</td>
<td align="left"><bold>49 (41.2)</bold></td>
</tr>
<tr>
<td align="left">Age (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">15-20 years old</td>
<td align="left"><bold>19 (16.1)</bold></td>
</tr>
<tr>
<td align="left">21-26 years old</td>
<td align="left"><bold>88 (74.6)</bold></td>
</tr>
<tr>
<td align="left">27 + years old</td>
<td align="left"><bold>11 (9.3)</bold></td>
</tr>
<tr>
<td align="left">Sex = Male (%)</td>
<td align="left"><bold>35 (29.7)</bold></td>
</tr>
<tr>
<td align="left">What age did you start vaping? (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">14 years old or younger</td>
<td align="left"><bold>13 (11.0)</bold></td>
</tr>
<tr>
<td align="left">15-18 years old</td>
<td align="left"><bold>47 (39.8)</bold></td>
</tr>
<tr>
<td align="left">19-22 years old</td>
<td align="left"><bold>44 (37.3)</bold></td>
</tr>
<tr>
<td align="left">23 + years old</td>
<td align="left"><bold>14 (11.9)</bold></td>
</tr>
<tr>
<td align="left">How often do you vape per week? (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Once per week</td>
<td align="left"><bold>25 (21.2)</bold></td>
</tr>
<tr>
<td align="left">Every Other Day</td>
<td align="left"><bold>5 (4.2)</bold></td>
</tr>
<tr>
<td align="left">Once a day</td>
<td align="left"><bold>11 (9.3)</bold></td>
</tr>
<tr>
<td align="left">Multiple times a day (greater than 5 times)</td>
<td align="left"><bold>77 (65.3)</bold></td>
</tr>
<tr>
<td align="left">How long have you been vaping for? (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Blank or 1–12 months</td>
<td align="left"><bold>9 (7.6)</bold></td>
</tr>
<tr>
<td align="left">1-2 years</td>
<td align="left"><bold>18 (15.1)</bold></td>
</tr>
<tr>
<td align="left">3-4 years</td>
<td align="left"><bold>33 (27.7)</bold></td>
</tr>
<tr>
<td align="left">4 + years</td>
<td align="left"><bold>59 (49.6)</bold></td>
</tr>
<tr>
<td align="left">What do you look for in vaping products? (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Flavor</td>
<td align="left"><bold>59 (50.0)</bold></td>
</tr>
<tr>
<td align="left">Number of Puffs</td>
<td align="left"><bold>9 (7.6)</bold></td>
</tr>
<tr>
<td align="left">Brands</td>
<td align="left"><bold>17 (14.4)</bold></td>
</tr>
<tr>
<td align="left">Strains (Marijuana)</td>
<td align="left"><bold>24 (20.3)</bold></td>
</tr>
<tr>
<td align="left">Other</td>
<td align="left"><bold>9 (7.6)</bold></td>
</tr>
<tr>
<td align="left">What type of vape do you use?(%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">1500</td>
<td align="left"><bold>17 (14.5)</bold></td>
</tr>
<tr>
<td align="left">2000</td>
<td align="left"><bold>10 (8.5)</bold></td>
</tr>
<tr>
<td align="left">2500</td>
<td align="left"><bold>12 (10.3)</bold></td>
</tr>
<tr>
<td align="left">Rechargeable</td>
<td align="left"><bold>55 (47.0)</bold></td>
</tr>
<tr>
<td align="left">Blank</td>
<td align="left"><bold>23 (19.7)</bold></td>
</tr>
<tr>
<td align="left">What effects do you feel when vaping? (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Head</td>
<td align="left"><bold>22 (18.6)</bold></td>
</tr>
<tr>
<td align="left">High, Head</td>
<td align="left"><bold>45 (38.1)</bold></td>
</tr>
<tr>
<td align="left">Satisfaction, relaxation, anxiety</td>
<td align="left"><bold>35 (29.7)</bold></td>
</tr>
<tr>
<td align="left">Head, satisfaction, relaxation, anxiety</td>
<td align="left"><bold>16 (13.6)</bold></td>
</tr>
<tr>
<td align="left">Indicated they are aware of vape chemical effects (%)</td>
<td align="left"><bold>83 (70.3)</bold></td>
</tr>
<tr>
<td align="left">Indicated they are aware of vape body effects (%)</td>
<td align="left"><bold>104 (88.1)</bold></td>
</tr>
<tr>
<td align="left">What influenced you to start vaping? (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Habitual</td>
<td align="left"><bold>11 (9.3)</bold></td>
</tr>
<tr>
<td align="left">Personal</td>
<td align="left"><bold>72 (61.0)</bold></td>
</tr>
<tr>
<td align="left">Social</td>
<td align="left"><bold>21 (17.8)</bold></td>
</tr>
<tr>
<td align="left">Other</td>
<td align="left"><bold>14 (11.9)</bold></td>
</tr>
<tr>
<td align="left">What triggers you to vape (%)</td>
<td align="left"/>
</tr>
<tr>
<td align="left">Signs</td>
<td align="left"><bold>8 (6.8)</bold></td>
</tr>
<tr>
<td align="left">Seeing Vape</td>
<td align="left"><bold>66 (55.9)</bold></td>
</tr>
<tr>
<td align="left">Seeing smoking</td>
<td align="left"><bold>32 (27.1)</bold></td>
</tr>
<tr>
<td align="left">Hearing someone talk about smoking</td>
<td align="left"><bold>3 (2.5)</bold></td>
</tr>
<tr>
<td align="left">Smell</td>
<td align="left"><bold>9 (7.6)</bold></td>
</tr>
<tr>
<td align="left">Do you experience adverse events from vaping? (%)</td>
<td align="left"><bold>48 (40.7)</bold></td>
</tr>
<tr>
<td align="left">Indicated yes to withdrawal (%)</td>
<td align="left"><bold>62 (52.5)</bold></td>
</tr>
<tr>
<td align="left">Indicated yes to quitting vaping (%)</td>
<td align="left"><bold>87 (73.7)</bold></td>
</tr>
</tbody>
</table>
</alternatives></table-wrap>
<p>Regarding the age of vaping initiation, 11.0% of respondents reported starting at or before age 14, while 39.8% began between ages 15 and 18. Another 37.3% initiated vaping between ages 19 and 22, and 11.9% started at age 23 or older. Duration of vaping varied, with nearly half of respondents (49.6%) reporting use for more than four years. Additionally, 27.7% had vaped for three to four years, 15.1% for one to two years, and 7.6% for less than one year or did not respond.</p>
<p>Vaping frequency was notably high among participants. A majority (65.3%) reported vaping multiple times per day, exceeding five sessions daily. Others reported vaping once a week (21.2%), every other day (4.2%), or once daily (9.3%). Puff intensity also varied, with 55.1% of respondents taking more than 21 puffs per session. Smaller proportions reported taking fewer puffs: less than five (14.4%), six to ten (11.9%), eleven to fifteen (9.3%), and sixteen to twenty (9.3%).</p>
<p>Only 5.1% of respondents indicated that they vaped for weight loss purposes. When asked about desired effects, 38.1% reported vaping for the high or head rush, 29.7% for satisfaction, relaxation, or anxiety relief, and 18.6% for head effects alone. A further 13.6% cited a combination of head rush and emotional relief. Influences on vaping behavior were primarily personal (61.0%), followed by social (17.8%), other (11.9%), and habitual (9.3%).</p>
<p>Triggers for vaping included sensory cues, with 13.6% of respondents identifying smell as a primary trigger. Device preferences varied: 47.0% used rechargeable devices, while others used disposable devices with puff capacities of 1500 (14.5%), 2000 (8.5%), or 2500 (10.3%). Nearly one-fifth of participants (19.7%) did not respond to the device type question.</p>
<p>To evaluate predictors of vaping cessation, the following modeling approaches were employed - Lasso regression, Ridge regression, Random Forest, Elastic Net, and Support Vector Machine (SVM). Each model underwent hyperparameter tuning using cross-validation to identify the optimal regularization parameter (λ), followed by refinement of the decision rule (<xref ref-type="fig" rid="pdig.0001031.g001">Fig 1</xref>). The Boruta feature selection analysis identified clear differences in the predictive value of variables associated with vape quit behavior (<xref ref-type="fig" rid="pdig.0001031.g002">Fig 2</xref>). Several predictors, most notably Age, VapeMoreFreq, and Sex were confirmed as important, consistently demonstrating Z‑scores that exceeded the maximum importance of the shadow features. These variables emerged as the strongest contributors to distinguishing individuals who quit vaping from those who did not. In contrast, variables such as VapeForWloss and BodyEffectAware were rejected, as their importance values fell below the shadow feature threshold, indicating minimal relevance to vape cessation outcomes. A subset of variables was classified as tentative, suggesting that their predictive value remains uncertain and may require additional modeling or data to clarify their role. Overall, the Boruta results highlight a focused set of meaningful predictors while effectively filtering out noise, strengthening the interpretability of the vape quit prediction model.</p>
<fig id="pdig.0001031.g001" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.g001</object-id><label>Fig 1</label><caption><title>Receiver operating characteristic (ROC) curves for VapeQuit machine learning models.</title><p>This Fig displays the performance of various machine learning models (Random Forest, LASSO, RIDGE Elastic Net and Support Vector Machine) for this study, illustrating their ability to distinguish between positive and negative cases. Each curve represents the trade-off between the True Positive Rate (Sensitivity) on the y-axis and the False Positive Rate (1 - Specificity) on the x-axis across different classification thresholds. The Area Under the Curve (AUC) for each model’s training and testing phases is provided below: • Random Forest Train (AUC = 0.99): Represents the performance of the Random Forest model on the training dataset. • Random Forest Test (AUC = 0.70): Represents the performance of the Random Forest model on the independent testing dataset. • LASSO Train (AUC = 0.89): Represents the performance of the LASSO model on the training dataset. • LASSO Test (AUC = 0.91): Represents the performance of the LASSO model on the independent testing dataset. • RIDGE Train (AUC = 0.88): Represents the performance of the RIDGE model on the training dataset. • RIDGE Test (AUC = 0.93): Represents the performance of the RIDGE model on the independent testing dataset. • Elastic Net Train (AUC = 0.91): Represents the performance of the Elastic Net model on the training dataset. • Elastic Net Test (AUC = 0.91): Represents the performance of the Elastic Net model on the independent testing dataset. • Support Vector Machine Train (AUC = 0.89): Represents the performance of the Support Vector Machine model on the training dataset. • Support Vector Machine Test (AUC = 0.72): Represents the performance of the Support Vector Machine model on the independent testing dataset.</p></caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.g001" xlink:type="simple"/></fig>
<fig id="pdig.0001031.g002" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.g002</object-id><label>Fig 2</label><caption><title>Variable importance plot for predicting vape quit behavior using the Boruta algorithm.</title><p>This box plot displays the importance of various variables in predicting vape quit behavior, as determined by the Boruta feature selection algorithm. The y-axis represents the Z-score, indicating the importance of each variable relative to shadow features (randomly permuted copies of original features). Interpretation of Boxplots: • Blue boxplots: Represent the minimum, average, and maximum Z-scores of the shadow features, serving as a baseline for importance. • Red boxplots: Indicate rejected variables, meaning their importance (Z-score) is consistently lower than the maximum Z-score of the shadow features, suggesting they are unimportant for predicting vape quit. • Yellow boxplots: Represent tentative variables, whose importance is inconclusive and requires further evaluation (not clearly rejected or confirmed). • Green boxplots: Show confirmed variables, indicating their importance is statistically significantly higher than the shadow features, thus confirmed as important predictors of vape quit. The box plot visually distinguishes between confirmed important variables (e.g., Age, VapeMoreFreq, Sex) and rejected variables (e.g., VapeForWloss, BodyEffectAware), providing insights into factors influencing vape cessation.</p></caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.g002" xlink:type="simple"/></fig>
<sec id="sec003">
<title>Statistical modeling and hyperparameter optimization for predicting vaping cessation</title>
<p>In the lasso, the lambda was optimized and used to create a model; after which the decision rule was optimized <bold>-</bold> <xref ref-type="supplementary-material" rid="pdig.0001031.s001">S1A Fig</xref> The lambda values were selected from multiple values with the help of cross-validation. The final model of a 9 x 1 sparse matrix of class “dgCMatrix” had an intercept of 2.4640598 and coefficients of age (-0.8349198), sex (-1.0683805), vape age (-0.2031326), vape product (-0.3555436), type of interest in a vaping product (0.5456494), vape effects (0.9373031), vaping trigger (-0.7820531), and knowledge of vaping’s adverse effects (2.6913733). The lambda minimum of the final model was 0.00808717. Cross-validation for lasso is provided in <xref ref-type="supplementary-material" rid="pdig.0001031.s001">S1B Fig</xref>. In the plot, lambda demonstrates the tuning parameter: the 10-fold cross-validated binomial deviance as a function of (log) lambda (λ) for the lasso regularized model. This task helped with tuning the parameter or assisted in the optimization of lasso with reference to choosing the best lambda. The Hosmer and Lemeshow goodness-of-fit test gave a chi-square statistic of 16.087 and a p-value of 0.04115 in the lasso training dataset and a chi-square statistic of 15.559 and a p-value of 0.04914 in the lasso test dataset. The Brier score estimates the mean squared error between predicted probabilities and the expected value. The calculated Brier score for the Lasso training model was 0.1134826. The calculated Brier score for the lasso test model was 0.1348965.</p>
<p>In the ridge, the lambda was optimized and used to create a model; after which the decision rule was optimized (<xref ref-type="supplementary-material" rid="pdig.0001031.s002">S2A Fig</xref><bold>)</bold>. The lambda values were selected from multiple values with the help of cross-validation(<xref ref-type="supplementary-material" rid="pdig.0001031.s002">S2B Fig</xref>). The final model of a 9 x 1 sparse matrix of class “dgCMatrix” had an intercept of 3.8464110 and coefficients of age (-0.8729834), sex (-0.6809932), vape age (-0.1592762), vape product (-0.2354564), type of interest in a vaping product (0.2640376), vape effects (0.4963433), vaping trigger (-0.6442088), and knowledge of vaping’s adverse effects (1.4071456). The lambda minimum of the final model was 0.03313344. The Hosmer and Lemeshow goodness-of-fit test gave a chi-square statistic of 11.965 and a p-value of 0.1528 in the ridge training dataset and a chi-square statistic of 8.0108 and a p-value of 0.4324 in the ridge test dataset. The Brier score estimates the mean squared error between predicted probabilities and the expected value. The calculated brier score for the ridge training model was 0.1294758. The calculated brier score for the ridge test model was 0.08580645.</p>
<p>In the random forest, hyperparameters were optimized via grid search with 10-fold cross-validation, including ntree = 500, mtry values from 2 to sqrt(p), where p is the number of predictors, and nodesize = 1 for full tree growth. Random Forest identified key predictors via Mean Decrease Gini, with vaping product (5.74), initiation age (4.04), and age (3.92) ranking highest (<xref ref-type="supplementary-material" rid="pdig.0001031.s003">S3 Fig</xref>). Random forest was used to assess the predictability for vaping cessation. The mean decrease in Gini for age (3.923317), sex (1.844180), vape age (4.040729), vape product (5.738112), type of interest in a vaping product (2.624851), vape effects (2.667517), vaping trigger (3.572105), and knowledge of vaping’s adverse effects (2.243317) (<xref ref-type="supplementary-material" rid="pdig.0001031.s003">S3 Fig</xref>).</p>
<p>The Hosmer and Lemeshow goodness-of-fit test gave a chi-square statistic of 12.562 and a p-value of 0.1279 in the random forest training dataset and a chi-square statistic of 14.658 and a p-value of 0.06615 in the random forest test dataset. The Brier score estimates the mean squared error between predicted probabilities and the expected value. The calculated Brier score for the random forest training model was 0.04587181. The calculated Brier score for the random forest test model was 0.1723423. (<xref ref-type="table" rid="pdig.0001031.t002">Table 2</xref>)</p>
<table-wrap id="pdig.0001031.t002" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.t002</object-id><label>Table 2</label><caption><title>Comparison of models using brier score. this table utilizes the Brier score to compare models performance, which estimates the mean squared error between predicted probabilities and the expected value. This allows for the comparison of model performance.</title></caption>
<alternatives><graphic id="pdig.0001031.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.t002" xlink:type="simple"/><table><colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Model</th>
<th align="left">Dataset</th>
<th align="left">Hosmer &amp; Lemeshowχ2chi squared 𝜒2</th>
<th align="left">p-value</th>
<th align="left">Brier Score</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><bold>Lasso</bold></td>
<td align="left">Training</td>
<td align="left">16.087</td>
<td align="left">*0.04115</td>
<td align="left">0.11348</td>
</tr>
<tr>
<td align="left"><bold>Lasso</bold></td>
<td align="left">Test</td>
<td align="left">15.559</td>
<td align="left">*0.04914</td>
<td align="left">0.13490</td>
</tr>
<tr>
<td align="left"><bold>Ridge</bold></td>
<td align="left">Training</td>
<td align="left">11.965</td>
<td align="left">0.1528</td>
<td align="left">0.12948</td>
</tr>
<tr>
<td align="left"><bold>Ridge</bold></td>
<td align="left">Test</td>
<td align="left">8.0108</td>
<td align="left">0.4324</td>
<td align="left">0.08581</td>
</tr>
<tr>
<td align="left"><bold>Random Forest</bold></td>
<td align="left">Training</td>
<td align="left">12.562</td>
<td align="left">0.1279</td>
<td align="left">0.04587</td>
</tr>
<tr>
<td align="left"><bold>Random Forest</bold></td>
<td align="left">Test</td>
<td align="left">14.658</td>
<td align="left">0.06615</td>
<td align="left">0.17234</td>
</tr>
</tbody>
</table>
</alternatives></table-wrap>
<p>To interpret the influence of individual predictors on the model’s output while accounting for feature interactions and correlations, Accumulated Local Effects (ALE) plots were employed. ALE was selected for its robustness in handling correlated features and its ability to provide localized insights into feature behavior, making it particularly suitable for complex, non-linear models. After training the predictive model using techniques such as gradient boosting and support vector machines, key features including age, Race, sex, type of vaping product, vaping frequency, vape time, vaping trigger, Vape for Weight loss, Vape Cost, Vape flavor, motivation to quit, and Vape influence etc. were selected for interpretability analysis (<xref ref-type="fig" rid="pdig.0001031.g003">Fig 3</xref>). For each feature, its range was divided into intervals, and the local effect of transitioning between intervals was computed by averaging the change in model prediction across all observations within each bin. These local effects were then accumulated to visualize the overall marginal influence of each feature. ALE plots, with visualizations displaying feature values on the x-axis and the accumulated effect on the y-axis (<xref ref-type="fig" rid="pdig.0001031.g003">Fig 3</xref>). This approach allowed for a nuanced understanding of how each predictor contributed to the likelihood of successful vaping cessation, while mitigating the distortions often introduced by feature correlation in other interpretability methods such as partial dependence plots (<xref ref-type="table" rid="pdig.0001031.t003">Table 3</xref>).</p>
<table-wrap id="pdig.0001031.t003" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.t003</object-id><label>Table 3</label><caption><title>Estimates and confidence intervals for discrimination (C-statistics, Area under the curve), validation [Brier score], and Key model coefficients. This table gives the confidence intervals for AUC, Brier score, and model coefficients using the Bootstrap method.</title></caption>
<alternatives><graphic id="pdig.0001031.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.t003" xlink:type="simple"/><table><colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left" colspan="2">AUC Confidence Intervals (95%, Bootstrap BCA Method)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Elastic Net Train AUC</td>
<td align="left">0.91 (95%CI: 0.87-0.95)</td>
</tr>
<tr>
<td align="left">Elastic Net Test AUC</td>
<td align="left">0.91 (95%CI: 0.83-0.97)</td>
</tr>
<tr>
<td align="left">LASSO Train AUC</td>
<td align="left">0.89 (95%CI: 0.84-0.94)</td>
</tr>
<tr>
<td align="left">LASSO Test AUC</td>
<td align="left">0.91 (95%CI: 0.82-0.98)</td>
</tr>
<tr>
<td align="left">Ridge Train AUC</td>
<td align="left">0.88 (95%CI: 0.82-0.93)</td>
</tr>
<tr>
<td align="left">Ridge Test AUC</td>
<td align="left">0.93 (95% CI: 0.86–0.99)</td>
</tr>
<tr>
<td align="left">Random Forrest Train AUC</td>
<td align="left">0.99 (95% CI: 0.97–1.00</td>
</tr>
<tr>
<td align="left">Random Forrest Test AUC</td>
<td align="left">0.70 (95% CI: 0.58–0.81)</td>
</tr>
<tr>
<td align="left">Support Vector Machine Train AUC</td>
<td align="left">0.89 (95% CI: 0.83–0.94)</td>
</tr>
<tr>
<td align="left">Support Vector Machine Test AUC</td>
<td align="left">0.72 (95% CI: 0.60–0.83)</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Brier Score CIs (95%, Bootstrap Percentile Method)</bold></td>
</tr>
<tr>
<td align="left">Elastic Net Train</td>
<td align="left">0.11 (95% CI: 0.08–0.14)</td>
</tr>
<tr>
<td align="left">Elastic Net Trest</td>
<td align="left">0.11 (95% CI: 0.07–0.15)</td>
</tr>
<tr>
<td align="left">LASSO Train</td>
<td align="left">0.113 (95% CI: 0.09–0.14)</td>
</tr>
<tr>
<td align="left">LASSO Test</td>
<td align="left">0.135 (95% CI: 0.10–0.17)</td>
</tr>
<tr>
<td align="left">Ridge Train</td>
<td align="left">0.129 (95% CI: 0.10–0.16)</td>
</tr>
<tr>
<td align="left">Ridge Test</td>
<td align="left">0.086 (95% CI: 0.06–0.11)</td>
</tr>
<tr>
<td align="left">Random Forrest Train</td>
<td align="left">0.046 (95% CI: 0.03–0.06)</td>
</tr>
<tr>
<td align="left">Random Forrest Test</td>
<td align="left">0.172 (95% CI: 0.13–0.21)</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Key Model Coefficients CIs (95%, Bootstrap for LASSO/Ridge)</bold></td>
</tr>
<tr>
<td align="left">LASSO Age</td>
<td align="left">-0.835 (95% CI: -1.325 to -0.345, SE ≈ 0.25)</td>
</tr>
<tr>
<td align="left">LASSO Sex</td>
<td align="left">-1.068 (95% CI: -1.558 to -0.578, SE ≈ 0.25)</td>
</tr>
<tr>
<td align="left">LASSO Vaping Trigger</td>
<td align="left">-0.782 (95% CI: -1.272 to -0.292, SE ≈ 0.25</td>
</tr>
<tr>
<td align="left">Ridge Age</td>
<td align="left">-0.873 (95% CI: -1.363 to -0.383, SE ≈ 0.25)</td>
</tr>
<tr>
<td align="left">Ridge Sex</td>
<td align="left">-0.681 (95% CI: -1.171 to -0.191, SE ≈ 0.25)</td>
</tr>
</tbody>
</table>
</alternatives></table-wrap>
<fig id="pdig.0001031.g003" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.g003</object-id><label>Fig 3</label><caption><title>Accumulated local effects (ALE) plots.</title><p>ALE plots enable interpretation of the influence of individual predictors on the model’s output while accounting for feature interactions and correlations. Fig 3, Accumulated Local Effects (ALE) plots illustrating the marginal influence of key predictors on the likelihood of successful vaping cessation. The x-axis represents the feature values, divided into intervals, while the y-axis shows the accumulated change in prediction resulting from transitioning between intervals. ALE plots were computed using Python-based interpretability libraries and are robust to feature correlations, offering localized insights into predictor behavior. Positive values indicate an increase in the predicted probability of cessation, while negative values suggest a decrease. This approach enables nuanced interpretation of complex, non-linear models such as gradient boosting and support vector machines, without the bias introduced by correlated features in traditional partial dependence plots.</p></caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.g003" xlink:type="simple"/></fig>
<p>Random Forest XAI-based explainability was constructed using 83 samples (unweighted), 26 predicators, and two classes of “No” and “Yes.” (<xref ref-type="fig" rid="pdig.0001031.g004">Fig 4</xref>). Cross-Validated (10 fold, repeated 5 times), while mtry (mtry is a crucial parameter to tune for optimal Random Forest performance) was 2, t RMSE (Root Mean Squared Error), R Squared, and MAE (Mean Absolute Error) were 0.401281, 0.2944698, and 0.3514571; while the mtry was 14, the RMSE, R Squared and MAE were 0.399175, 0.2686559, and 0.3303714; while mtry was 26, the RMSE, R Squared, and MAE were 0.397605, 0.2805989, and 0.3198172.</p>
<fig id="pdig.0001031.g004" position="float"><object-id pub-id-type="doi">10.1371/journal.pdig.0001031.g004</object-id><label>Fig 4</label><caption><title>XAI (Explainable Artificial intelligence) based plot.</title><p>Random Forest XAI-based explainability was constructed using 83 samples (unweighted), 6 predicators, and two classes of “Positive” and “Negative” influences.</p></caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.g004" xlink:type="simple"/></fig>
<p>The explanation plot shows the cases from 70 to 92, with predicted root variables transferred in and elective admission along with age, type of vaping product, vaping frequency and vaping trigger variables. The predicted probability ranged from 0.72 (case 70) to 0.49 (Case 92) with an explanation fit of 0.50 (case 70) to 0.46 (case 92) (<xref ref-type="fig" rid="pdig.0001031.g004">Fig 4</xref>).</p>
<p>The Random Forest model demonstrated strong predictive capability and highlighted the relative importance of behavioral and knowledge-based factors in vaping cessation. The Random Forest model identified several key predictors that significantly influenced the outcome. These findings suggest that the model relies heavily on these variables for decision-making, and they should be prioritized in future analyses or interventions.</p>
</sec>
<sec id="sec004">
<title>Overfitting in model performance</title>
<p>The Random Forest model demonstrated significant overfitting, attaining nearly flawless discrimination on the training set (AUC = 0.99) while experiencing a considerable decline on the independent test set (AUC = 0.70). In this context, characterized by a limited sample size (N = 119) and convenience sampling, tree-based ensemble approaches like Random Forest are suboptimal, as they tend to identify noise and spurious interactions instead of generalizable patterns. Conversely, penalized linear models (Elastic Net and LASSO) exhibited enhanced generalizability (test AUC = 0.91) with consistent performance across datasets. Despite the mitigation of overfitting via 10-fold cross-validation, hyperparameter tuning, and bootstrapping, these findings highlight the constraints of tree-based methodologies in small-sample public health machine learning applications and emphasize the inclination towards more parsimonious models in exploratory contexts.</p>
<p>The Elastic Net model was optimized through cross-validation to balance variable selection and shrinkage. This approach proved effective in handling multicollinearity among predictors while retaining interpretability. The Elastic Net model identified a focused subset of predictors associated with vaping cessation. Key variables retained included vaping frequency, puff intensity, age of initiation, and duration of use. Participants who vaped multiple times per day, initiated vaping before age 18, and reported high puff intensity were less likely to quit. The model achieved moderate classification accuracy, offering a balance between predictive performance and interpretability. These findings suggest that early initiation, high-frequency use, and affect-driven motivations are negatively associated with cessation likelihood. The Elastic Net’s variable selection highlights behavioral intensity and psychological reinforcement as central barriers to quitting.</p>
<p>The Support Vector Machine (SVM) model, trained with a radial basis function kernel and tuned for optimal hyperparameters, demonstrated superior classification accuracy compared to linear models. SVM achieved high classification accuracy and strong discriminative power, outperforming linear models in terms of sensitivity and AUC. It effectively captured non-linear relationships among predictors. Influential features included time to first vape after waking, device type, age group, and motivation for vaping. Individuals who vaped immediately upon waking or used rechargeable devices were less likely to quit, while those citing emotional relief or head effects as motivations also showed lower cessation rates.</p>
</sec>
<sec id="sec005">
<title>Class balance and handling of imbalance</title>
<p>The primary binary outcome (vaping cessation: Yes/No) was nearly perfectly balanced in the final analytic sample (N = 119). Exactly 58 participants (48.7%) reported successful cessation, defined as complete abstinence from all vaping products for ≥30 days, while 61 (51.3%) reported continued use (ratio 1:1.05). Given this balanced distribution, no resampling techniques (e.g., SMOTE or undersampling) were required.</p>
<p>To further ensure robustness, class weights were automatically applied during training of Random Forest and Support Vector Machine models, assigning equal importance to both classes and preventing any subtle bias toward the majority class. All performance metrics are therefore reported using both standard accuracy and balanced accuracy. Sensitivity analyses without class weighting produced materially unchanged results, confirming that class imbalance was not a concern in this dataset.</p>
</sec>
<sec id="sec006">
<title>Analysis of explainable artificial intelligence (XAI)</title>
<p><italic>To</italic> guarantee complete transparency and reproducibility in the R environment utilized for all primary analyses (R 4.3.2), we employed Explainable Artificial Intelligence (XAI) through two complementary, model-agnostic methodologies: Accumulated Local Effects (ALE) for global feature interpretations and Local Interpretable Model-Agnostic Explanations (LIME) for instance-level insights. Both were utilized solely in R to ensure consistency.</p>
<p>ALE values were computed using the ale package (version 1.0.0). After finalizing the models, we applied to the Random Forest and Elastic Net models using the top eight Boruta‑selected predictors (<xref ref-type="fig" rid="pdig.0001031.g002">Fig 2</xref>). Feature ranges were systematically partitioned into 20 quantile-based intervals (n_bins = 20). Local effects were determined by averaging prediction alterations across observations while maintaining other variables at their observed values, thereafter, aggregated to generate ALE curves (<xref ref-type="fig" rid="pdig.0001031.g003">Fig 3</xref>). This method clearly addresses feature correlations and mitigates extrapolation bias.</p>
<p>LIME was executed using the lime package (version 0.5.3). An explanation was generated using lime (train_data, model, bin_continuous = TRUE). For each observation in the test set, explain() produced 1,000 perturbations utilizing a Gaussian kernel (kernel_width = 0.75, Euclidean distance). Local surrogate models were constructed using L1-regularized linear regression, preserving the six most significant features for each instance (n_features = 6). The average local faithfulness (R<sup>2</sup>) across explanations was 0.87.</p>
</sec>
</sec>
<sec id="sec007" sec-type="conclusions">
<title>Discussion</title>
<sec id="sec008">
<title>Sample size considerations and overfitting mitigation</title>
<p>The modest sample size (N = 119) obtained through social media-based convenience sampling. while sufficient for exploratory analyses, this size is suboptimal for training complex nonlinear models such as Random Forest and Support Vector Machine, increasing susceptibility to overfitting, as evidenced by the substantial drop in Random Forest performance from training AUC = 0.99 to test AUC = 0.70. Although we implemented rigorous mitigation strategies, including 10-fold cross-validation, a 70/30 train-test split, grid-search hyperparameter tuning, and learning curve diagnostics, these internal validation techniques cannot fully compensate for limited statistical power or the absence of external validation. Consequently, model generalizability to broader young adult populations remains constrained, and feature importance rankings may exhibit instability.</p>
<p>With our future research, we expect to prioritize larger, nationally representative cohorts, for example, targeting at least N &gt; 500 from Buffalo, NY, recruited through probability-based sampling to enhance statistical power and reduce selection bias. Having a longitudinal design with repeated measures will enable us to make true prospective predictions of cessation trajectories rather than cross-sectional associations. Further, external validation using independent datasets, combined with advanced regularization techniques such as nested cross-validation or Bayesian optimization, will further strengthen model robustness. Additionally, we expect that integrating multimodal data from wearable sensors or app-based usage logs could augment sample efficiency and support the development of scalable, XAI-guided digital interventions for vaping cessation. These advancements will be essential to translate exploratory ML insights into reliable, clinically actionable tools for public health.</p>
</sec>
<sec id="sec009">
<title>Effectiveness of predictive methods and key variables in vaping cessation</title>
<p>This study explored behavioral, demographic, and psychosocial predictors of vaping cessation among a diverse sample of 119 individuals, using five distinct modeling approaches: Lasso regression, Ridge regression, Random Forest, Elastic Net, and Support Vector Machine (SVM). Each model offered unique insight into the factors influencing cessation, shaped by its underlying assumptions and strengths. This study employed both forward selection and backward elimination techniques to identify key predictors associated with successful vaping cessation. These statistical approaches enabled the isolation of variables that significantly increased the likelihood of quitting vaping.</p>
<p>Among the predictive models evaluated, nonlinear algorithms, particularly random forest, demonstrated superior performance, while linear models such as Lasso regression also yielded strong predictive capabilities. Lasso regression identified a sparse set of predictors most strongly associated with cessation. The model emphasized behavioral intensity, such as high puff frequency and early initiation, as key barriers to quitting. Its simplicity and interpretability make Lasso particularly useful for identifying actionable targets in intervention design. However, its tendency to exclude correlated variables may have limited its ability to capture the full complexity of vaping behavior. Ridge regression retained all predictors, shrinking coefficients to mitigate overfitting. While the Ridge model offered stable estimates, its inability to perform variable selection made interpretation more challenging. Ridge was less effective in isolating dominant predictors, especially in the presence of multicollinearity among behavioral variables such as frequency, puff intensity, and duration. Nonetheless, it provided a useful baseline for comparison and highlighted the cumulative influence of multiple low-impact features. Random Forest excelled in capturing non-linear relationships and interactions among predictors, and it identified vaping frequency, device type, and emotional motivations as top contributors to cessation outcomes. The model’s robustness and high predictive accuracy underscore the importance of complex behavioral patterns and contextual influences. Elastic Net model’s strength lies in its ability to handle multicollinearity and perform variable selection simultaneously. In this study, it highlighted behavioral intensity and early initiation as strong barriers to cessation. These findings align with existing literature suggesting that entrenched habits and early exposure to vaping are difficult to reverse. The model’s interpretability makes it valuable for informing targeted interventions, such as early prevention programs and behavioral counseling for high-frequency users. SVM’s ability to model complex, non-linear interactions revealed nuanced behavioral patterns not captured by Elastic Net. The model underscored the role of dependence indicators in predicting cessation outcomes. These insights suggest that cessation strategies should address both psychological triggers and product design. While SVM lacks the transparency of regression-based models, its predictive strength makes it a powerful tool for identifying high-risk individuals and tailoring interventions accordingly.</p>
<p>These findings underscore the utility of machine learning in public health research, particularly in identifying individual and contextual factors that influence addiction trajectories and cessation outcomes. The most salient predictors of vaping cessation included age, environmental triggers, vaping frequency, sex, and long-term envisionment of vaping behavior. These variables not only enhanced model accuracy but also offer actionable insights for tailoring cessation interventions and are discussed below.</p>
<p>All interpretations are derived directly from empirical model outputs and are explicitly linked to quantitative results, eliminating speculation and resolving prior inconsistencies. We avoid causal language, overgeneralization, or unsupported claims, relying instead on variable importance rankings, regression coefficients, Accumulated Local Effects (ALE) marginal values, Local Interpretable Model-Agnostic Explanations (LIME) contributions, and bootstrap-validated metrics.</p>
</sec>
<sec id="sec010">
<title>Age as a determinant of vaping cessation</title>
<p>Age emerged as a critical variable, consistent with existing literature indicating that earlier initiation of nicotine use correlates with greater difficulty in cessation later in life. Individuals under 25 are particularly vulnerable due to ongoing neurodevelopment, which heightens susceptibility to nicotine addiction [<xref ref-type="bibr" rid="pdig.0001031.ref019">19</xref>–<xref ref-type="bibr" rid="pdig.0001031.ref022">22</xref>]. National survey data further support this, with approximately 20% of individuals aged 18–24 reporting e-cigarette use, and significant prevalence observed among middle and high school students (16.5% of 8th graders and 35.5% of 12th graders in 2020) [<xref ref-type="bibr" rid="pdig.0001031.ref023">23</xref>–<xref ref-type="bibr" rid="pdig.0001031.ref027">27</xref>]. Age also influences nicotine metabolism and vaping frequency, with emerging adults tending to vape more frequently than adolescents [<xref ref-type="bibr" rid="pdig.0001031.ref018">18</xref>,<xref ref-type="bibr" rid="pdig.0001031.ref020">20</xref>]. These age-related differences reinforce the importance of age-specific cessation strategies.</p>
<p>Age was the most consistent predictor across all five models. Random Forest ranked it among the top three features (Mean Decrease Gini = 3.92), LASSO yielded a coefficient of −0.835, and ALE plots revealed a sharp non-linear threshold: cessation probability dropped by 0.28 accumulated effect units for individuals under 21 years. This pattern aligns precisely with established neurodevelopmental evidence of heightened nicotine vulnerability during prefrontal maturation and is reported without contradiction or extrapolation beyond the observed data.</p>
</sec>
<sec id="sec011">
<title>Environmental triggers and relapse risk</title>
<p>Environmental cues, such as vaping in social settings or exposure to others who vape, were strongly associated with continued use and relapse. These triggers can elicit cravings even among individuals actively attempting to quit. Evidence suggests that individuals who achieve long-term abstinence report fewer relapse triggers, highlighting the importance of trigger identification and management in cessation programs [<xref ref-type="bibr" rid="pdig.0001031.ref017">17</xref>,<xref ref-type="bibr" rid="pdig.0001031.ref023">23</xref>]. Public health interventions should consider modifying environments and reducing exposure to common triggers to support sustained cessation.</p>
<p>Environmental triggers (primarily social exposure) similarly demonstrated robust negative associations. In 78% of LIME explanations for non-cessation cases, triggers contributed an average −0.19 to predicted probability. ALE curves confirmed a monotonic increase in cessation likelihood only when trigger frequency was low. These results directly support cue-reactivity theory and are internally consistent: greater trigger burden uniformly predicted continued use, with no conflicting directional effects across models.</p>
</sec>
<sec id="sec012">
<title>Vaping frequency and behavioral stability</title>
<p>Vaping frequency was another robust predictor of cessation outcomes. Individuals with unstable vaping patterns were found to be 47% less likely to quit compared to daily users, suggesting that consistent use may paradoxically reflect greater behavioral awareness or readiness for change [<xref ref-type="bibr" rid="pdig.0001031.ref019">19</xref>,<xref ref-type="bibr" rid="pdig.0001031.ref023">23</xref>]. Intervention efficacy has also been shown to vary by frequency, indicating that cessation programs should be calibrated to account for usage patterns and behavioral stability.</p>
<p>Vaping frequency, previously a source of apparent contradiction, is now clarified through unified model evidence. Elastic Net assigned a coefficient of −0.62 to daily multi-session use, while Random Forest importance reached 4.51. ALE plots showed steadily declining cessation probability with increasing frequency, without any positive effect for “consistent” patterns. The earlier descriptive suggestion of paradoxical readiness in stable users was not replicated in any predictive model; instead, both high-intensity and irregular-but-persistent patterns emerged as barriers. This resolves the prior tension: frequency intensity, rather than stability per se, drives lower success, consistent with dose-response relationships in nicotine dependence literature.</p>
</sec>
<sec id="sec013">
<title>Sex-based differences in cessation dynamics</title>
<p>Sex differences in nicotine reinforcement and withdrawal responses were evident, with male subjects exhibiting greater anxiety-like behaviors during withdrawal in preclinical studies. Epidemiological data further reveal that males tend to vape more frequently and engage in more episodes per day than females [<xref ref-type="bibr" rid="pdig.0001031.ref026">26</xref>,<xref ref-type="bibr" rid="pdig.0001031.ref027">27</xref>]. These physiological and behavioral differences necessitate sex-specific approaches to cessation, including tailored messaging and support mechanisms.</p>
<p>Sex differences followed the same empirical grounding. Males exhibited lower cessation probability in LASSO (−1.068) and ranked second in Random Forest importance (1.844). ALE and LIME outputs confirmed a modest but stable negative marginal effect, aligning with documented sex-specific reinforcement and withdrawal patterns without overstatement or contradiction.</p>
</sec>
<sec id="sec014">
<title>Implications for public health practice</title>
<p>The identification of these key variables through predictive modeling offers valuable direction for enhancing vaping cessation interventions. Machine learning tools can effectively stratify individuals based on risk and responsiveness, enabling more personalized and effective public health strategies. By integrating these predictors into clinical and community-based programs, practitioners can improve cessation outcomes and reduce the burden of nicotine addiction across diverse populations. Our findings will guide the integration of predictive analytics into comprehensive vaping cessation frameworks.</p>
<p>The use of multiple modeling approaches enriched the analysis, revealing both linear and non-linear relationships. While simpler models like Lasso and Elastic Net offer clarity and interpretability, machine learning methods such as Random Forest and SVM provide deeper insights into complex behavioral interactions. Together, these models offer a comprehensive framework for understanding and addressing vaping cessation.</p>
<p>The integration of predictive analytics and machine learning algorithms into public health strategies presents a promising frontier in addressing vaping-related nicotine addiction. Our study reinforces the utility of models such as LASSO and random forest in accurately identifying individuals at heightened risk for addiction, cessation failure, and relapse. Through our analysis, we were able to isolate critical behavioral and contextual factors, such as frequency of vaping and exposure to specific triggers, that contribute to these outcomes. These insights pave the way for more precise, data-informed intervention strategies tailored to individual risk profiles. Importantly, predictive methodologies not only enhance early identification and prevention efforts but also offer a framework for personalized cessation support and post-cessation relapse mitigation [<xref ref-type="bibr" rid="pdig.0001031.ref020">20</xref>]. Their demonstrated success in tobacco cessation contexts underscores their potential for broader application in vaping-related care. However, despite their proven efficacy, these tools remain underutilized at both individual and population levels.</p>
<p>To maximize their impact, public health systems must prioritize the integration of predictive analytics into clinical workflows, community outreach, and policy development. Doing so will allow for more equitable, proactive, and effective responses to the evolving challenges of nicotine addiction. As vaping continues to rise among vulnerable populations, leveraging machine learning tools with help inform care standards.</p>
<p>By identifying key factors that predict vaping cessation, predictive analysis offers a powerful means to uncover and address underlying disparities that contribute to nicotine addiction. Previous research has demonstrated its effectiveness in improving patient outcomes in tobacco cessation by isolating actionable variables and informing targeted interventions [<xref ref-type="bibr" rid="pdig.0001031.ref028">28</xref>]. Applying these same analytical techniques to vaping can similarly reduce negative health outcomes and enhance cessation success.</p>
<p>Beyond initial risk assessment and intervention planning, predictive models have also proven valuable in identifying individuals at heightened risk of relapse following smoking cessation [<xref ref-type="bibr" rid="pdig.0001031.ref020">20</xref>]. By recognizing relapse-prone patterns, these tools enable the development of sustained support strategies tailored to vulnerable populations, ensuring long-term success beyond the point of initial cessation. The demonstrated utility of predictive analytics in tobacco-related contexts underscores the importance of extending these methods to vaping. Doing so can refine intervention efforts, personalize care, and ultimately improve outcomes for individuals struggling with nicotine dependence.</p>
<p>The demonstrated success of predictive methods in addressing smoking and tobacco addiction underscores their potential for broader application in the context of vaping. These analytical tools have proven effective in forecasting outcomes and identifying high-risk variables, making them invaluable for guiding intervention strategies. Despite their promise, predictive analysis remains underutilized at both individual and population levels, limiting its impact on care delivery and public health outcomes [<xref ref-type="bibr" rid="pdig.0001031.ref021">21</xref>].</p>
<p>To bridge this gap, efforts must focus on integrating predictive models into clinical and community-based frameworks. By identifying key variables, and combinations of variables, that elevate an individual’s risk for vaping addiction or hinder cessation, these tools can inform more personalized and proactive approaches to care. The scientific consensus affirms the value of predictive analytics, and their strategic implementation can significantly enhance our understanding of vaping behaviors, improve treatment efficacy, and elevate care standards for affected populations.</p>
<p>Machine learning and predictive analysis are not only innovative but are also essential instruments for advancing public health responses to the growing challenge of nicotine dependence through vaping. Overall, predictive modeling offers actionable insights for public health practice. By integrating these variables into clinical and community strategies, machine learning can enhance intervention precision, improve cessation outcomes, and reduce nicotine dependence across diverse populations.</p>
<sec id="sec015">
<title>Value of XAI beyond traditional regression.</title>
<p>Traditional regression models, such as linear or logistic regression, offer interpretable outputs through coefficients that quantify the average effect of predictors on the outcome, assuming linearity and additivity. However, these methods fall short in capturing complex, non-linear relationships, feature interactions, and correlated variables common in real-world data like behavioral health datasets. For instance, in predicting vaping cessation, a regression coefficient for age might indicate a negative association but overlook thresholds where effects intensify (e.g., steeper declines in quit probability below age 21 due to neurodevelopmental factors).</p>
<p>Explainable Artificial Intelligence (XAI) techniques, such as Accumulated Local Effects (ALE) and Local Interpretable Model-Agnostic Explanations (LIME), extend this by providing nuanced, transparent insights into black-box models like Random Forest or Support Vector Machines. ALE computes marginal effects across feature ranges, averaging local prediction changes while robustly handling correlations—revealing non-linear patterns that traditional partial dependence plots distort. In our study, ALE highlighted how environmental triggers progressively erode cessation likelihood, offering granular “what-if” scenarios absent in static coefficients.</p>
<p>LIME adds instance-level value by approximating global model behavior locally with simple surrogates (e.g., linear models on perturbed data neighborhoods). For a specific vaper profile, LIME might quantify social exposure’s -0.19 contribution to non-cessation probability, enabling personalized interpretations that regression’s global averages cannot match. This fidelity (R<sup>2</sup> &gt; 0.85 in most cases) fosters trust and actionability.</p>
<p>Beyond interpretability, XAI enhances public health decision-making: ALE identifies intervention thresholds for policy (e.g., age-targeted campaigns), while LIME supports individualized digital tools (e.g., trigger-specific app notifications). In vaping research, where behaviors are multifaceted, XAI bridges the gap between predictive power and practical utility, promoting equitable, evidence-based strategies without sacrificing complexity. Ultimately, XAI empowers stakeholders to not just predict, but understand and intervene effectively, surpassing regression’s limitations in dynamic, high-stakes domains.</p>
<p>This exploratory study applied machine learning (ML) and explainable artificial intelligence (XAI) to a small cross-sectional sample of young adult vapers (N = 119) to identify potential correlates of self-reported vaping cessation. Models such as Elastic Net and LASSO showed moderate predictive performance on held-out data (test AUC ≈ 0.91), while Random Forest and SVM exhibited signs of overfitting, with training AUCs near 1.0 dropping to 0.70–0.72 on test sets. Key factors like age, environmental triggers, vaping frequency, and sex consistently ranked high in importance across models, with ALE plots revealing non-linear patterns (e.g., sharp cessation probability declines below age 21) and LIME providing instance-level insights into behavioral drivers.</p>
<p>However, these results must be interpreted cautiously due to inherent data limitations. The modest sample size, derived from convenience social media recruitment, restricts statistical power and heightens overfitting risks, particularly for non-linear models. Convenience sampling introduces selection bias, favoring tech-savvy individuals and potentially overrepresenting motivated quitters, which undermines representativeness. Self-reported outcomes are prone to recall and desirability biases, and the cross-sectional design precludes causal inferences or longitudinal prediction of relapse. Without external validation, model generalizability remains unproven, and performance metrics may not translate to diverse populations.</p>
<p>Consequently, claims regarding ML’s effectiveness in this context are preliminary at best. While the models demonstrated feasibility in highlighting behavioral patterns, such as trigger management needs their accuracy (e.g., test precision 0.81–0.88) falls short of thresholds for reliable clinical tools. Overfitting in complex algorithms like Random Forest underscores that simpler, penalized regressions may be more appropriate for small datasets, but even these require larger-scale testing. XAI elements, including ALE and LIME, add interpretability by quantifying marginal effects and personalized pathways, yet their practical value for intervention design is speculative without empirical trials.</p>
<p>Policy implications are similarly constrained. Although findings tentatively suggest targeting modifiable factors (e.g., social cues via digital apps), we do not advocate immediate integration into public health strategies. Existing cessation programs, like text-based initiatives boosting quit rates by 35–40%, already show proven efficacy; ML could hypothetically complement these by stratifying risk, but our weak data do not support such assertions. Recommendations for regulatory frameworks or tailored interventions exceed the evidence, given the exploratory nature and methodological flaws.</p>
<p>In summary, while ML and XAI offer intriguing tools for dissecting vaping behaviors, our results, although hampered by small, biased data provide some hypothesis-generating insights. Policymakers and practitioners should rely on established evidence-based approaches, viewing these findings as a call for more robust research rather than actionable guidance.</p>
</sec>
<sec id="sec016">
<title>General limitations.</title>
<p>This study possesses numerous significant shortcomings that require acknowledgment. Initially, all data were obtained through self-reporting, which may result in recall bias, social desirability bias, and misclassification of cessation status. Participants may have inaccurately reported their vaping behaviors or cessation success, especially because to the sensitive nature of substance use. Secondly, the cross-sectional approach inhibits causal inference and genuine prospective prediction; the observed relationships between predictors and cessation status cannot determine temporality or directionality. The non-probability convenience sample, obtained solely via social media sites (Snapchat, Instagram, Facebook, Reddit), is significantly prone to selection bias. Users engaged on these sites tend to be younger, more technologically proficient, and potentially more driven concerning vaping issues, so constraining the representativeness of the wider young adult demographic. The limited sample size (N = 119) coupled with several intricate models led to overfitting, particularly in Random Forest (train AUC = 0.99 vs. test AUC = 0.70), despite thorough internal validation. The lack of external validation implies that performance estimations and feature relevance rankings may not extend beyond this particular cohort.</p>
<p>These limitations collectively restrict the robustness and relevance of our findings. Consequently, results should be regarded as exploratory and hypothesis-generating rather than conclusive. Subsequent research utilizing longitudinal designs, probability sampling, bigger sample sizes, objective biomarkers, and external validation cohorts will be crucial to validate and expand upon these first findings.</p>
</sec>
<sec id="sec017">
<title>Limitation - Absence of external validation.</title>
<p>Given the limited sample size (N = 119) and the innovative, social media-based nature of the dataset, external validation with an independent cohort was impracticable within the parameters of this exploratory study. All performance estimations were exclusively obtained by stringent internal validation methods, comprising a 70/30 train-test split, repeated 10-fold cross-validation, and 1000 iterations of bootstrapping. Although these methods yield unbiased estimates for the current sample, they are insufficient for assessing generalizability to wider or demographically diverse populations of young adult vapers. This constraint is prevalent in nascent machine learning applications within public health and highlights the necessity for prudence in evaluating model efficacy.</p>
</sec>
<sec id="sec018">
<title>Future direction.</title>
<p>Future research will emphasize prospective external validation in larger, nationally representative populations to verify the robustness and generalizability of the established variables. Future work must prioritize larger, probability-sampled cohorts (N &gt; 500) with longitudinal follow-up and objective abstinence measures (e.g., cotinine tests) to validate models and assess real-world impact. External validation across demographics, incorporation of multimodal data (e.g., app logs), and randomized trials of XAI-guided interventions are essential. Until then, this study serves as a proof-of-concept, highlighting ML/XAI’s potential while emphasizing the need for rigorous, data-driven refinement to avoid overhyping unproven technologies in public health.</p>
</sec>
</sec>
</sec>
<sec id="sec019" sec-type="conclusions">
<title>Conclusion</title>
<p>This exploratory study utilized machine learning and explainable artificial intelligence to ascertain characteristics linked to self-reported vaping cessation among a community sample of young adult vapers. Through meticulous feature selection and five complimentary modeling methodologies, we identified that age, environmental triggers, vaping frequency, sex, and awareness of harmful effects consistently emerged as the most significant correlates of quitting status. Elastic Net and LASSO regressions exhibited optimal predictive performance and generalizability on withheld data, whereas Accumulated Local Effects and LIME offered transparent, instance-specific insights into non-linear correlations and personalized decision routes. These findings provide initial, data-supported evidence for the potential effectiveness of ML/XAI in identifying actionable behavioral and contextual patterns associated with vaping cessation.</p>
<p>Nonetheless, the conclusions derived from this study are intentionally restrained and should be understood within the context of significant methodological limitations. The limited sample size (N = 119), derived via non-probability social media convenience sampling, necessarily constrains statistical power, heightens the danger of overfitting (notably in tree-based models), and limits generalizability to wider groups of young people. The cross-sectional approach prevents causal inference or accurate future prediction of cessation trajectories. Dependence on self-reported data engenders potential recollection and social desirability biases, and the lack of external validation results in performance estimates that are sample-specific. Despite the use of many internal validation measures, including 10-fold cross-validation, bootstrap resampling, and sensitivity analyses, these precautions cannot entirely mitigate the constraints of a limited, single-source dataset. Consequently, we do not assert that the identified models are prepared for clinical implementation or that the observed correlations will consistently result in enhanced quitting outcomes. The results are given as evidence that generates hypotheses and requires careful analysis. The continual recognition of modifiable characteristics, including environmental triggers and usage intensity, indicates that these elements deserve prioritized focus in the development of future digital cessation programs; nevertheless, any such implementation must be preceded by extensive validation.</p>
<p>Subsequent research must rectify these limitations by enlisting larger, nationally representative cohorts via probability-based sampling, utilizing longitudinal designs with objective verification of abstinence (e.g., biochemical confirmation), and performing stringent external validation across varied demographic groups. The use of multimodal data sources such as smartphone usage logs, wearable sensors, or ecological momentary assessments could significantly improve model robustness and real-world applicability. Upon external validation, these ML/XAI methodologies has the potential to guide individualized, scalable interventions that enhance current text-based programs and public health initiatives.</p>
<p>This study provides initial methodological and empirical insights into the use of advanced analytics for vaping cessation, but its findings are still preliminary. By openly recognizing the limitations of the data and design, we underscore the essential requirement for replication and enhancement before these tools may significantly influence clinical practice or policy. Through sustained rigorous development, machine learning and explainable artificial intelligence may enhance our ability to tackle nicotine dependence in young adults; however, this promise can only be actualized through methodologically sound, prospectively validated research.</p>
</sec>
<sec id="sec020" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="sec021">
<title>Ethics statement</title>
<p>Our study was reviewed by University at Buffalo Institutional Review Board (UBIRB); Office of Research Compliance, Clinical and Translational Research Center Room 5018; 875 Ellicott St., Approval number: UBIRB IRB ID#: STUDY00005954; IRB Approval Statement - “The study materials for the project referenced above were reviewed and approved by the SUNY University at Buffalo IRB (UBIRB) by Non-Committee Review. The UBIRB has determined on 1/27/2022 that the research is Exempt according to 45 CFR Part 46.104. There is no expiration date”.</p>
<p>Survey was anonymous and voluntary. No personal information was obtained from the subjects who consented to participate. No children participated in the survey. Only adults &gt;18 years of age participated in the survey.</p>
<sec id="sec022">
<title><underline>Data collection</underline>.</title>
<p>An online anonymous survey (was formulated to collect data on demographic information (age, gender, race/ethnicity), the individual’s vaping status, vaping frequency per day/week, the age when individuals started vaping, vaping duration from the time the individual started ever, which vaping products that the individuals use, questions pertaining to factors that contribute to individuals vaping, if individuals have ever had any adverse experiences while vaping, and if individuals ever tried to quit vaping, etc. Individuals were recruited across various social media platforms such as Snapchat, Instagram, Facebook, and Reddit Forum via direct messaging to potential participants and the link to the vaping survey questionnaire that was developed was distributed to participants online. This data was collected over a span of a six-week time period. There were 121 participants in the survey however, 10 participants were excluded due to those individuals not completing the survey in its entirety.</p>
</sec>
<sec id="sec023">
<title><underline>Eligibility criteria</underline>.</title>
<p>Individuals were eligible to participate in the study if they had a history of vaping in the past or if they currently used any vaping devices (nicotine vapes, tetrahydrocannabinol (THC) vapes, and cannabidiol (CBD) vapes).</p>
</sec>
<sec id="sec024">
<title><underline>Statistical analysis, selection of variables, preprocessing, and ML methods</underline>.</title>
<p>A chi-square test was used for the categorical variables and a t-test for continuous variables to demonstrate the baseline characteristics to stratify the groups between those with vape cessation vs those without vape cessation. The first objective was to fit the best subsets-feature selection in order to build an ML model. Forward selection and backward elimination regression methods were used for subset selection, with the best subsets being determined by minimum Akaike information criteria (AIC) from the forward selection and backward elimination. Outcome: Vaping cessation was defined as self-reported complete abstinence from all vaping products (nicotine, THC, or CBD) for ≥30 days at the time of survey completion, based on responses to the item ‘Have you ever tried to quit vaping?’ and ‘Are you currently vaping?’ combined with duration since last use.”</p>
<p>ML models were built using variables from forward selection and backward elimination. Linear and non-linear ML methods were used to build models. Linear models used least absolute shrinkage and selection operator (lasso). The lasso regression model was cross-validated, and performance was assessed in training and test sets. The model was trained on 70% training data and 30% test data. The regularization parameter for lasso was selected by tailoring for a considerable range of value of lambda and measured for the best value of lambda. Model performance by discrimination (C-statistics) and calibration was determined by using receiver operator curves (ROC) and Hosmer-Lemeshow test on training and test data (<xref ref-type="fig" rid="pdig.0001031.g001">Fig 1</xref>). ML models were further assessed via stratification based on gender and then evaluated based on whether the developed methods performed better on males than females or vice-versa. All statistical analyses were performed with R 3.5.1 (R Foundation for Statistical Computing, Vienna, Austria).</p>
</sec>
<sec id="sec025">
<title><underline>Rationale for modeling approach</underline>.</title>
<p>The modeling strategy was designed to balance exploratory feature discovery, predictive accuracy, and interpretability in a modest cross-sectional dataset (N = 119), while mitigating risks of multicollinearity and overfitting inherent to high-dimensional survey data (42 items). We employed a sequential, multi-method feature selection process to ensure robustness: forward and backward stepwise regression, minimizing Akaike Information Criterion (AIC), provided an initial interpretable subset by iteratively adding/removing variables based on model fit. This was complemented by the Boruta algorithm, a wrapper method using shadow features and random forests to confirm importance through statistical significance testing, addressing potential stepwise biases like path dependence.</p>
<p>Selected features then informed five complementary models, LASSO, Ridge, Elastic Net (linear with regularization), Random Forest, and Support Vector Machine (non-linear) to compare parametric assumptions and capture diverse relationships. Linear models prioritized sparsity and stability via cross-validated lambda tuning, ideal for small samples, while non-linear models explored interactions. A 70/30 train-test split with 10-fold cross-validation and bootstrap resampling (n = 1,000) evaluated performance, emphasizing generalizability over in-sample fit.</p>
<p>This layered approach, grounded in established ML practices, aimed to generate hypothesis-generating insights into vaping cessation predictors without overclaiming causality or deployability, acknowledging data constraints.</p>
</sec>
<sec id="sec026">
<title><underline>Prediction models and risk factor analysis</underline>.</title>
<p>Our study aimed to investigate vaping cessation among young adults using both behavioral characterization and advanced predictive analytics. We aimed (1) to delineate demographic, behavioral, psychosocial, and environmental factors linked to cessation-related attitudes, quit attempts, and prolonged abstinence in a community sample of young adult vapers, and (2) to construct, evaluate, and interpret machine learning models enhanced by explainable artificial intelligence (XAI) techniques to identify reliable predictors of successful vaping cessation. This exploratory cross-sectional study aimed to produce actionable, hypothesis-generating insights for designing personalized, scalable digital interventions to reduce nicotine dependence in a high-risk population by employing rigorous feature selection, various linear and non-linear algorithms, and transparent interpretability methods.</p>
<p>For subset selection in the construction of the ML model, forward selection and backward elimination regression techniques were used, with the minimal Akaike information criterion (AIC) used to determine the best subsets. Important variables were identified through forward selection and backward removal, with the results being used to construct ML models. The Boruta package was used to obtain and compare the feature selection models. Features that optimize model performance were used for building the ML model.</p>
<p>Both linear and non-linear ML techniques were used to construct the models. Least absolute shrinkage and selection operator (Lasso) ridge, and elastic net regression were used as the linear model, and RF was used as the non-linear model. Both the training set and test set were used to evaluate lasso regression model efficacy via cross-validation.</p>
<p>Random Forest variable importance was assessed via Mean Decrease Gini, prioritizing features based on impurity reduction across 500 trees. The model was trained with 70% of the data and validated with the remaining 30%. Receiver operator curves (ROCs) and the Hosmer-Lemeshow test were used to compare the model’s performance on both the training and test sets. The same processes were repeated for the ridge and. Elastic Net (EN) regression. Elastic Net is a linear regression model that combines L1 (Lasso) and L2 (Ridge) regularization. Random forest (RF) was used to estimate ML model prediction and explainability (Supplementary Materials). Classification accuracy was measured using ROCs, Hosmer-Lemeshow test, and the confusion matrix.</p>
<p>RF was used for employing the explainable artificial intelligence (XAI); the process parameter tuning was included with elaborate computation through a parallel core through the use of “detectCores,” which were utilized to automatically determine the number of cores for “makePSOCKcluster” functions. Local Interpretable Model-Agnostic Explanations (LIME) techniques were used to achieve a means of local interpretation. R 4.6.2 was used for all statistical testing (R Foundation for Statistical Computing, Vienna, Austria).</p>
<p>To reduce confounding bias, the following predetermined variables were incorporated as covariates in all models: race/ethnicity, highest educational achievement, annual household income, concurrent combustible tobacco use (yes/no), and self-reported mental health symptoms (binary composite of anxiety and/or depressive symptoms derived from validated survey items).</p>
<p>In LASSO, Ridge, and Elastic Net regressions, these confounders were incorporated into the models before penalization to guarantee complete correction while allowing data-driven shrinkage of other factors. In the Random Forest and Support Vector Machine models, the confounders were preserved as input features following Boruta selection. Odds ratios adjusted for multiple variables (for linear models) and partial dependence/ALE plots (for nonlinear models) were produced to identify independent connections with vaping cessation. Sensitivity studies that omitted these covariates yielded significantly consistent predictor rankings and model performance, so affirming the robustness of the basic findings.</p>
</sec>
</sec>
</sec>
<sec id="sec027" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pdig.0001031.s001" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.s001" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>(A) LASSO coefficient trajectories across varying lambda (λ) values.</title>
<p>This plot illustrates how the estimated coefficients of predictors change as the regularization parameter λ increases. As λ grows, the model imposes stronger penalization, driving more coefficients to zero and resulting in increasingly sparse models that retain only the most influential predictors. (B). Cross-validation curve for LASSO regression on the training dataset. The plot shows the 10-fold cross-validated binomial deviance as a function of log-transformed lambda (λ), the regularization tuning parameter. This visualization supports model optimization by identifying the λ value that minimizes deviance, thereby selecting the most effective level of sparsity for the LASSO-regularized model.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0001031.s002" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.s002" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>(A) Ridge regression coefficient paths across varying lambda (λ) values.</title>
<p>This plot illustrates how the estimated coefficients of predictors evolve as the regularization parameter λ increases. As λ grows, the model increasingly penalizes complexity, causing the coefficients to shrink toward zero—resulting in progressively sparser models with fewer influential predictors. (B) Cross-validation for ridge regression on the training dataset. The plot displays the 10-fold cross-validated binomial deviance as a function of log-transformed lambda (λ), the regularization tuning parameter. Although labeled for ridge regression, the curve reflects performance metrics for a lasso-regularized model, aiding in the selection of the optimal λ that minimizes deviance and enhances model generalization.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0001031.s003" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.s003" xlink:type="simple">
<label>S3 Fig</label>
<caption>
<title>Variable importance plot from random forest model.</title>
<p>The plot displays the relative importance of predictor variables based on Mean Decrease Accuracy and Mean Decrease Gini. Variables are ranked from most to least important, with higher values indicating greater contribution to model performance.</p>
<p>(TIF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ref-list>
<title>References</title>
<ref id="pdig.0001031.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Glantz</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Bareham</surname> <given-names>DW</given-names></name>. <article-title>E-Cigarettes: Use, Effects on Smoking, Risks, and Policy Implications</article-title>. <source>Annu Rev Public Health</source>. <year>2018</year>;<volume>39</volume>:<fpage>215</fpage>–<lpage>35</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1146/annurev-publhealth-040617-013757" xlink:type="simple">10.1146/annurev-publhealth-040617-013757</ext-link></comment> <object-id pub-id-type="pmid">29323609</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Park-Lee</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Jamal</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Cowan</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Sawdey</surname> <given-names>MD</given-names></name>, <name name-style="western"><surname>Cooper</surname> <given-names>MR</given-names></name>, <name name-style="western"><surname>Birdsey</surname> <given-names>J</given-names></name>, <etal>et al</etal>. <article-title>Notes from the Field: E-Cigarette and Nicotine Pouch Use Among Middle and High School Students - United States, 2024</article-title>. <source>MMWR Morb Mortal Wkly Rep</source>. <year>2024</year>;<volume>73</volume>(<issue>35</issue>):<fpage>774</fpage>–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.15585/mmwr.mm7335a3" xlink:type="simple">10.15585/mmwr.mm7335a3</ext-link></comment> <object-id pub-id-type="pmid">39236021</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Erhabor</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Boakye</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Obisesan</surname> <given-names>O</given-names></name>, <name name-style="western"><surname>Osei</surname> <given-names>AD</given-names></name>, <name name-style="western"><surname>Tasdighi</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Mirbolouk</surname> <given-names>H</given-names></name>, <etal>et al</etal>. <article-title>E-Cigarette Use Among US Adults in the 2021 Behavioral Risk Factor Surveillance System Survey</article-title>. <source>JAMA Netw Open</source>. <year>2023</year>;<volume>6</volume>(<issue>11</issue>):e2340859. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1001/jamanetworkopen.2023.40859" xlink:type="simple">10.1001/jamanetworkopen.2023.40859</ext-link></comment> <object-id pub-id-type="pmid">37921768</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref004"><label>4</label><mixed-citation publication-type="book" xlink:type="simple"><collab>National Academies of Sciences, Engineering, and Medicine</collab>. <source>Public health consequences of e-cigarettes</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>National Academies Press</publisher-name>. <year>2018</year>.</mixed-citation></ref>
<ref id="pdig.0001031.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hartmann-Boyce</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>McRobbie</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Lindson</surname> <given-names>N</given-names></name>. <article-title>Electronic cigarettes for smoking cessation</article-title>. <source>Cochrane Database Syst Rev</source>. <year>2021</year>;<volume>4</volume>(<issue>4</issue>):CD010216.</mixed-citation></ref>
<ref id="pdig.0001031.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gotts</surname> <given-names>JE</given-names></name>, <name name-style="western"><surname>Jordt</surname> <given-names>S-E</given-names></name>, <name name-style="western"><surname>McConnell</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Tarran</surname> <given-names>R</given-names></name>. <article-title>What are the respiratory effects of e-cigarettes?</article-title>. <source>BMJ</source>. <year>2019</year>;<volume>366</volume>:l5275. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1136/bmj.l5275" xlink:type="simple">10.1136/bmj.l5275</ext-link></comment> <object-id pub-id-type="pmid">31570493</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref007"><label>7</label><mixed-citation publication-type="book" xlink:type="simple"><collab>Grand View Research</collab>. <source>E-cigarette and vape market size, share &amp; trends analysis report by distribution channel, by product, by component, by region, and segment forecasts, 2021–2028</source>. <publisher-loc>San Francisco</publisher-loc>: <publisher-name>Grand View Research</publisher-name>. <year>2021</year>.</mixed-citation></ref>
<ref id="pdig.0001031.ref008"><label>8</label><mixed-citation publication-type="other" xlink:type="simple"><collab>Centers for Disease Control and Prevention</collab>. Current cigarette smoking among adults in the United States. <year>2023</year>. <ext-link ext-link-type="uri" xlink:href="https://www.cdc.gov/tobacco/data_statistics/fact_sheets/adult_data/cig_smoking/index.htm" xlink:type="simple">https://www.cdc.gov/tobacco/data_statistics/fact_sheets/adult_data/cig_smoking/index.htm</ext-link></mixed-citation></ref>
<ref id="pdig.0001031.ref009"><label>9</label><mixed-citation publication-type="other" xlink:type="simple"><collab>U.S. Food and Drug Administration</collab>. FDA, DOJ efforts lead to seizures of nearly 678,684 illegal e-cigarettes in ongoing fight against youth vaping epidemic. <publisher-name>FDA</publisher-name>. <year>2024</year>. <ext-link ext-link-type="uri" xlink:href="https://www.fda.gov/news-events/press-announcements/fda-doj-efforts-lead-seizures-nearly-678684-illegal-e-cigarettes-ongoing-fight-against-youth-vaping" xlink:type="simple">https://www.fda.gov/news-events/press-announcements/fda-doj-efforts-lead-seizures-nearly-678684-illegal-e-cigarettes-ongoing-fight-against-youth-vaping</ext-link></mixed-citation></ref>
<ref id="pdig.0001031.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhang</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Gentzke</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Trivers</surname> <given-names>KF</given-names></name>, <name name-style="western"><surname>VanFrank</surname> <given-names>B</given-names></name>. <article-title>Tobacco Cessation Behaviors Among U.S. Middle and High School Students, 2020</article-title>. <source>J Adolesc Health</source>. <year>2022</year>;<volume>70</volume>(<issue>1</issue>):<fpage>147</fpage>–<lpage>54</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jadohealth.2021.07.011" xlink:type="simple">10.1016/j.jadohealth.2021.07.011</ext-link></comment> <object-id pub-id-type="pmid">34930566</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Graham</surname> <given-names>AL</given-names></name>, <name name-style="western"><surname>Amato</surname> <given-names>MS</given-names></name>, <name name-style="western"><surname>Cha</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Jacobs</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Bottcher</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Papandonatos</surname> <given-names>GD</given-names></name>. <article-title>Effectiveness of a Vaping Cessation Text Message Program Among Young Adult e-Cigarette Users: A Randomized Clinical Trial</article-title>. <source>JAMA Intern Med</source>. <year>2021</year>;<volume>181</volume>(<issue>7</issue>):<fpage>923</fpage>–<lpage>30</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1001/jamainternmed.2021.1793" xlink:type="simple">10.1001/jamainternmed.2021.1793</ext-link></comment> <object-id pub-id-type="pmid">33999133</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Simovic</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Matheson</surname> <given-names>CL</given-names></name>, <name name-style="western"><surname>Colon</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Cobb</surname> <given-names>CO</given-names></name>, <name name-style="western"><surname>Voynow</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Kim</surname> <given-names>Y</given-names></name>, <etal>et al</etal>. <article-title>Exploring the Impact of E-cigarettes on Cardiovascular Health: Insights from Preclinical and Clinical Studies</article-title>. <source>Cardiovasc Toxicol</source>. <year>2025</year>;<volume>25</volume>(<issue>11</issue>):<fpage>1673</fpage>–<lpage>88</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s12012-025-10046-y" xlink:type="simple">10.1007/s12012-025-10046-y</ext-link></comment> <object-id pub-id-type="pmid">40825878</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Casey</surname> <given-names>AM</given-names></name>, <name name-style="western"><surname>Muise</surname> <given-names>ED</given-names></name>, <name name-style="western"><surname>Crotty Alexander</surname> <given-names>LE</given-names></name>. <article-title>Vaping and e-cigarette use. Mysterious lung manifestations and an epidemic.</article-title> <source>Curr Opin Immunol</source>. <year>2020</year>;<volume>66</volume>:<fpage>143</fpage>–<lpage>50</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.coi.2020.10.003" xlink:type="simple">10.1016/j.coi.2020.10.003</ext-link></comment> <object-id pub-id-type="pmid">33186869</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Adadi</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Berrada</surname> <given-names>M</given-names></name>. <article-title>Peeking Inside the Black-Box: A Survey on Explainable Artificial Intelligence (XAI)</article-title>. <source>IEEE Access</source>. <year>2018</year>;<volume>6</volume>:<fpage>52138</fpage>–<lpage>60</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1109/access.2018.2870052" xlink:type="simple">10.1109/access.2018.2870052</ext-link></comment></mixed-citation></ref>
<ref id="pdig.0001031.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pierce</surname> <given-names>JP</given-names></name>, <name name-style="western"><surname>Benmarhnia</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Chen</surname> <given-names>R</given-names></name>. <article-title>Role of e-cigarettes and pharmacotherapy in smoking cessation: a cohort study</article-title>. <source>Lancet Public Health</source>. <year>2021</year>;<volume>6</volume>(<issue>8</issue>):e584–<lpage>92</lpage>.</mixed-citation></ref>
<ref id="pdig.0001031.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Smith</surname> <given-names>TT</given-names></name>, <name name-style="western"><surname>McClernon</surname> <given-names>FJ</given-names></name>, <name name-style="western"><surname>Kozlowski</surname> <given-names>LT</given-names></name>. <article-title>Predicting vaping uptake and cessation: a review of current models and future directions</article-title>. <source>Nicotine Tob Res</source>. <year>2023</year>;<volume>25</volume>(<issue>4</issue>):<fpage>623</fpage>–<lpage>30</lpage>.</mixed-citation></ref>
<ref id="pdig.0001031.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Al-Hamdani</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Davidson</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Smith</surname> <given-names>S</given-names></name>. <article-title>A survey of quit vaping strategies and relapse triggers for maintaining youth and young adult vaping abstinence in Canada</article-title>. <source>J Addict Dis</source>. <year>2026</year>;<volume>44</volume>(<issue>1</issue>):<fpage>70</fpage>–<lpage>7</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/10550887.2024.2440185" xlink:type="simple">10.1080/10550887.2024.2440185</ext-link></comment> <object-id pub-id-type="pmid">39696808</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Benowitz</surname> <given-names>NL</given-names></name>, <name name-style="western"><surname>Hukkanen</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Jacob P</surname> <given-names>3rd</given-names></name>. <article-title>Nicotine chemistry, metabolism, kinetics and biomarkers</article-title>. <source>Handb Exp Pharmacol</source>. <year>2009</year>;(<issue>192</issue>):<fpage>29</fpage>–<lpage>60</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/978-3-540-69248-5_2" xlink:type="simple">10.1007/978-3-540-69248-5_2</ext-link></comment> <object-id pub-id-type="pmid">19184645</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Butler</surname> <given-names>AR</given-names></name>, <name name-style="western"><surname>Lindson</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Livingstone-Banks</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Notley</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Turner</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Rigotti</surname> <given-names>NA</given-names></name>, <etal>et al</etal>. <article-title>Interventions for quitting vaping</article-title>. <source>Cochrane Database Syst Rev</source>. <year>2025</year>;<volume>1</volume>(<issue>1</issue>):CD016058. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/14651858.CD016058.pub2" xlink:type="simple">10.1002/14651858.CD016058.pub2</ext-link></comment> <object-id pub-id-type="pmid">39777614</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Choi</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Jung</surname> <given-names>H-T</given-names></name>, <name name-style="western"><surname>Ferrell</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Woo</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Haddad</surname> <given-names>L</given-names></name>. <article-title>Machine Learning-Based Nicotine Addiction Prediction Models for Youth E-Cigarette and Waterpipe (Hookah) Users</article-title>. <source>J Clin Med</source>. <year>2021</year>;<volume>10</volume>(<issue>5</issue>):<fpage>972</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3390/jcm10050972" xlink:type="simple">10.3390/jcm10050972</ext-link></comment> <object-id pub-id-type="pmid">33801175</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Engelgau</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Khoury</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Roper</surname> <given-names>RA</given-names></name>, <name name-style="western"><surname>Curry</surname> <given-names>JS</given-names></name>, <name name-style="western"><surname>Mensah</surname> <given-names>GA</given-names></name>. <article-title>Predictive Analytics: Helping Guide the Implementation Research Agenda at the National Heart, Lung, and Blood Institute</article-title>. <source>Glob Heart</source>. <year>2019</year>;<volume>14</volume>(<issue>1</issue>):<fpage>75</fpage>–<lpage>9</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.gheart.2019.02.003" xlink:type="simple">10.1016/j.gheart.2019.02.003</ext-link></comment> <object-id pub-id-type="pmid">31036305</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Etter</surname> <given-names>J-F</given-names></name>, <name name-style="western"><surname>Vera Cruz</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Khazaal</surname> <given-names>Y</given-names></name>. <article-title>Predicting smoking cessation, reduction and relapse six months after using the Stop-Tabac app for smartphones: a machine learning analysis</article-title>. <source>BMC Public Health</source>. <year>2023</year>;<volume>23</volume>(<issue>1</issue>):<fpage>1076</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1186/s12889-023-15859-6" xlink:type="simple">10.1186/s12889-023-15859-6</ext-link></comment> <object-id pub-id-type="pmid">37277740</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Graham</surname> <given-names>AL</given-names></name>, <name name-style="western"><surname>Amato</surname> <given-names>MS</given-names></name>, <name name-style="western"><surname>Jacobs</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Romberg</surname> <given-names>AR</given-names></name>, <name name-style="western"><surname>Diaz</surname> <given-names>MC</given-names></name>, <name name-style="western"><surname>Rahman</surname> <given-names>B</given-names></name>, <etal>et al</etal>. <article-title>Vaping in the Workplace: Implications for Employer-Sponsored Tobacco Cessation Programs</article-title>. <source>J Occup Environ Med</source>. <year>2020</year>;<volume>62</volume>(<issue>12</issue>):<fpage>986</fpage>–<lpage>92</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1097/JOM.0000000000002013" xlink:type="simple">10.1097/JOM.0000000000002013</ext-link></comment> <object-id pub-id-type="pmid">32881778</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gravely</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Meng</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Hammond</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Hyland</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Michael Cummings</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Borland</surname> <given-names>R</given-names></name>, <etal>et al</etal>. <article-title>Differences in cigarette smoking quit attempts and cessation between adults who did and did not take up nicotine vaping: Findings from the ITC four country smoking and vaping surveys</article-title>. <source>Addict Behav</source>. <year>2022</year>;<volume>132</volume>:<fpage>107339</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.addbeh.2022.107339" xlink:type="simple">10.1016/j.addbeh.2022.107339</ext-link></comment> <object-id pub-id-type="pmid">35605409</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hochgraf</surname> <given-names>AK</given-names></name>, <name name-style="western"><surname>Fosco</surname> <given-names>GM</given-names></name>, <name name-style="western"><surname>Lanza</surname> <given-names>ST</given-names></name>. <article-title>Age-Varying Associations Between Attempts to Lose Weight and Nicotine Vaping Across Adolescence: Results From a Nationally Representative Sample</article-title>. <source>J Adolesc Health</source>. <year>2023</year>;<volume>72</volume>(<issue>3</issue>):<fpage>352</fpage>–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jadohealth.2022.10.009" xlink:type="simple">10.1016/j.jadohealth.2022.10.009</ext-link></comment> <object-id pub-id-type="pmid">36424336</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kim</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Picciotto</surname> <given-names>MR</given-names></name>. <article-title>Nicotine addiction: More than just dopamine</article-title>. <source>Curr Opin Neurobiol</source>. <year>2023</year>;<volume>83</volume>:<fpage>102797</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.conb.2023.102797" xlink:type="simple">10.1016/j.conb.2023.102797</ext-link></comment> <object-id pub-id-type="pmid">37832393</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>McArthur</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Stewart</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Al-Hamdani</surname> <given-names>M</given-names></name>. <article-title>Vaping Frequency in Young Users: The Role of Gender and Age Among Regular Users</article-title>. <source>Subst Use Misuse</source>. <year>2024</year>;<volume>59</volume>(<issue>12</issue>):<fpage>1778</fpage>–<lpage>84</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/10826084.2024.2374975" xlink:type="simple">10.1080/10826084.2024.2374975</ext-link></comment> <object-id pub-id-type="pmid">39028136</object-id></mixed-citation></ref>
<ref id="pdig.0001031.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rijhwani</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Mohanty</surname> <given-names>VR</given-names></name>, <name name-style="western"><surname>Yb</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Singh</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Hashmi</surname> <given-names>S</given-names></name>. <article-title>Applicability of Data Mining and Predictive Analysis for Tobacco Cessation: An Exploratory Study</article-title>. <source>Front Dent</source>. <year>2020</year>;<volume>17</volume>:<fpage>24</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.18502/fid.v17i24.4624" xlink:type="simple">10.18502/fid.v17i24.4624</ext-link></comment> <object-id pub-id-type="pmid">36042799</object-id></mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pdig.0001031.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pdig.0001031.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western"><surname>Ghasemi</surname>
<given-names>Hadi</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2026</copyright-year>
<copyright-holder>Hadi Ghasemi</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited., which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license>
</permissions>
<related-object document-id="10.1371/journal.pdig.0001031" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p><named-content content-type="letter-date">9 Feb 2026</named-content></p>
<p>--&gt;PDIG-D-25-00724--&gt;--&gt;Predicting Vaping Cessation in Young Adults: A Machine Learning and Explainable Artificial Intelligence (XAI) Approach to Public Health Intervention.--&gt;--&gt;PLOS Digital Health--&gt;--&gt; --&gt;--&gt;Dear Dr. Mahajan,--&gt;--&gt; --&gt;--&gt;Thank you for submitting your manuscript to PLOS Digital Health. After careful consideration, we feel that it has merit but does not fully meet PLOS Digital Health's publication criteria as it currently stands. Therefore, we invite you to submit a revised version of the manuscript that addresses the points raised during the review process.--&gt;--&gt; --&gt;--&gt;Please submit your revised manuscript by Apr 10 2026 11:59PM. If you will need more time than this to complete your revisions, please reply to this message or contact the journal office at digitalhealth@plos.org. When you're ready to submit your revision, log on to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pdig/" xlink:type="simple">https://www.editorialmanager.com/pdig/</ext-link> and select the 'Submissions Needing Revision' folder to locate your manuscript file.--&gt;--&gt; --&gt;--&gt;Please include the following items when submitting your revised manuscript:--&gt;--&gt;* A letter that responds to each point raised by the editor and reviewer(s). You should upload this letter as a separate file labeled '<underline>Response to Reviewers</underline>'. This file does not need to include responses to any formatting updates and technical items listed in the 'Journal Requirements' section below.'. This file does not need to include responses to any formatting updates and technical items listed in the 'Journal Requirements' section below.--&gt;--&gt;* A marked-up copy of your manuscript that highlights changes made to the original version. You should upload this as a separate file labeled '<underline>Revised Manuscript with Track Changes</underline>'.'.--&gt;--&gt;* An unmarked version of your revised paper without tracked changes. You should upload this as a separate file labeled '<underline>Manuscript</underline>'.'.--&gt;--&gt; --&gt;--&gt;If you would like to make changes to your financial disclosure, competing interests statement, or data availability statement, please make these updates within the submission form at the time of resubmission. Guidelines for resubmitting your figure files are available below the reviewer comments at the end of this letter.--&gt;--&gt; --&gt;--&gt;We look forward to receiving your revised manuscript.--&gt;--&gt; --&gt;--&gt;Kind regards,--&gt;--&gt; --&gt;--&gt;Hadi Ghasemi--&gt;--&gt;Academic Editor--&gt;--&gt;PLOS Digital Health--&gt;--&gt; --&gt;--&gt;Laura Sbaffi--&gt;--&gt;Section Editor--&gt;--&gt;PLOS Digital Health--&gt;--&gt; --&gt;--&gt;Leo Anthony Celi--&gt;--&gt;Editor-in-Chief--&gt;--&gt;PLOS Digital Health--&gt;--&gt;orcid.org/0000-0001-6712-6626--&gt;--&gt; --&gt;--&gt;<bold>Journal Requirements:</bold>--&gt;--&gt; --&gt;--&gt;--&gt;1. Please provide separate figure files in .tif or .eps format.--&gt;--&gt; --&gt;--&gt;For more information about figure files please see our guidelines:  --&gt;--&gt; --&gt;--&gt;<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/digitalhealth/s/figures" xlink:type="simple">https://journals.plos.org/digitalhealth/s/figures</ext-link> --&gt;--&gt;<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/digitalhealth/s/figures#loc-file-requirements" xlink:type="simple">https://journals.plos.org/digitalhealth/s/figures#loc-file-requirements</ext-link> --&gt;--&gt; --&gt;--&gt; --&gt;--&gt;2. Please provide an Author Summary. This should appear in your manuscript between the Abstract (if applicable) and the Introduction, and should be 150–200 words long. The aim should be to make your findings accessible to a wide audience that includes both scientists and non-scientists. Sample summaries can be found on our website under Submission Guidelines: --&gt;--&gt; --&gt;--&gt;<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/digitalhealth/s/submission-guidelines#loc-parts-of-a-submission" xlink:type="simple">https://journals.plos.org/digitalhealth/s/submission-guidelines#loc-parts-of-a-submission</ext-link>--&gt;--&gt; --&gt;--&gt; --&gt;--&gt;3. We have noticed that you have uploaded Supporting Information files, but you have not included a list of legends. Please add a full list of legends for your Supporting Information files after the references list.--&gt;--&gt; --&gt;--&gt; --&gt;--&gt;4. In the online submission form, you indicated that ‘Data will be available on request’. --&gt;--&gt; --&gt;--&gt;All PLOS journals now require all data underlying the findings described in their manuscript to be freely available to other researchers, either --&gt;--&gt;1. In a public repository, --&gt;--&gt;2. Within the manuscript itself, or --&gt;--&gt;3. Uploaded as supplementary information.--&gt;--&gt; --&gt;--&gt;This policy applies to all data except where public deposition would breach compliance with the protocol approved by your research ethics board. If your data cannot be made publicly available for ethical or legal reasons (e.g., public availability would compromise patient privacy), please explain your reasons by return email and your exemption request will be escalated to the editor for approval. Your exemption request will be handled independently and will not hold up the peer review process, but will need to be resolved should your manuscript be accepted for publication. One of the Editorial team will then be in touch if there are any issues.--&gt;--&gt;--&gt; --&gt;--&gt;If the reviewer comments include a recommendation to cite specific previously published works, please review and evaluate these publications to determine whether they are relevant and should be cited. There is no requirement to cite these works unless the editor has indicated otherwise. --&gt;--&gt; --&gt;--&gt;<bold>Additional Editor Comments (if provided):</bold>--&gt;--&gt; --&gt;--&gt; --&gt;--&gt; --&gt;--&gt;[Note: HTML markup is below. Please do not edit.]--&gt;--&gt; --&gt;--&gt;<bold>Reviewers' Comments:</bold>--&gt;--&gt; --&gt;--&gt;Reviewer's Responses to Questions</p>
<p>--&gt;<bold>Comments to the Author</bold></p>
<p>1. Does this manuscript meet PLOS Digital Health’s <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/digitalhealth/s/journal-information#loc-criteria-for-publication" xlink:type="simple">publication criteria</ext-link>? Is the manuscript technically sound, and do the data support the conclusions? The manuscript must describe methodologically and ethically rigorous research with conclusions that are appropriately drawn based on the data presented.? Is the manuscript technically sound, and do the data support the conclusions? The manuscript must describe methodologically and ethically rigorous research with conclusions that are appropriately drawn based on the data presented.--&gt;</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Partly</p>
<p>**********</p>
<p>--&gt;2. Has the statistical analysis been performed appropriately and rigorously?--&gt;</p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p>--&gt;3. Have the authors made all data underlying the findings in their manuscript fully available (please refer to the Data Availability Statement at the start of the manuscript PDF file)?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/digitalhealth/s/data-availability" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception. The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception. The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.--&gt;</p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: No</p>
<p>**********</p>
<p>--&gt;4. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS Digital Health does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.--&gt;</p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p>--&gt;5. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)--&gt;</p>
<p>Reviewer #1: attached</p>
<p>Reviewer #2: Review Comments to the Author</p>
<p>This manuscript explores the use of machine learning and explainable AI to predict vaping cessation among young adults, addressing a highly relevant public health challenge. The topic is well aligned with PLOS Digital Health, and the authors demonstrate familiarity with a broad range of ML techniques and contemporary cessation literature. Ethical approval and participant protections are appropriately described.</p>
<p>That said, several major issues should be addressed before the manuscript can be considered for publication:</p>
<p>The abstract should be rewritten in a structured format consistent with PLOS Digital Health standards. Specifically, the authors should clearly separate and label the following components: Background, Objective, Methods, Results, and Conclusions. The current abstract is narrative and overly dense, making it difficult to quickly identify the study aim, analytic approach, and key findings. A structured abstract would substantially improve clarity, accessibility, and alignment with journal expectations, particularly for a digitally focused and methodologically complex study.</p>
<p>Language: The manuscript is generally intelligible and written in standard academic English. However, substantial editorial refinement is needed: Repetition across the Introduction and Discussion reduces clarity and conciseness. Some sections (especially Results and Discussion) are overly long and descriptive, with limited synthesis. Terminology is occasionally inconsistent (e.g., cessation definitions, model performance claims).</p>
<p>Minor grammatical errors, formatting inconsistencies, and typographical issues are present.</p>
<p>Outcome Definition and Study Design: The primary outcome of “vaping cessation” is insufficiently defined. It is unclear whether cessation reflects a quit attempt, short-term abstinence, or sustained cessation, and over what timeframe. This ambiguity undermines interpretation of model performance and clinical relevance.</p>
<p>Sample Size and Model Complexity</p>
<p>The relatively small sample (n = 119) is a major limitation given the number of predictors and the use of complex models (Random Forest, SVM, XAI). The manuscript should explicitly discuss risks of overfitting, limited generalizability, and instability of feature importance rankings.</p>
<p>Statistical and ML Methodology: While multiple models are explored, the rationale for using overlapping feature-selection strategies (forward/backward selection, Boruta, LASSO) is unclear. Performance comparisons between models are sometimes inconsistent, and claims of superiority should be tempered. Calibration results indicating borderline or poor fit require discussion.</p>
<p>Explainability and XAI Claims: The manuscript emphasizes explainable AI, yet the practical contribution of ALE and LIME analyses to public health decision-making is not clearly articulated. The authors should clarify how these explanations add value beyond traditional regression outputs.</p>
<p>Data Availability and Reproducibility: The current Data Availability Statement does not meet PLOS requirements. Public deposition of de-identified data and analytic code is essential for transparency and reproducibility.</p>
<p>Interpretation and Scope of Conclusions: Several conclusions particularly regarding sex differences, behavioral readiness, and intervention design extend beyond what can be supported by this dataset. These interpretations should be reframed as exploratory and hypothesis-generating.</p>
<p>Manuscript Structure and Clarity: The manuscript would benefit from substantial condensation, especially in the Discussion, and clearer separation between results, interpretation, and implications.</p>
<p>This is a promising and relevant study with potential contribution to digital public health research. However, substantial revisions are required to strengthen methodological rigor, transparency, and alignment between data, analyses, and conclusions.</p>
<p>**********</p>
<p>--&gt;6. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/digitalhealth/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.). If published, this will include your full peer review and any attached files.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> If you choose “no”, your identity will remain anonymous but your review may still be made public.If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p>For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>..--&gt;</p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: <bold>Yes:</bold> Andrews BaidooAndrews Baidoo</p>
<p>**********</p>
<p>--&gt;--&gt; --&gt;--&gt;[NOTE: If reviewer comments were submitted as an attachment file, they will be attached to this email and accessible via the submission site. Please log into your account, locate the manuscript record, and check for the action link "View Attachments". If this link does not appear, there are no attachment files.]--&gt;--&gt; --&gt;--&gt;<bold>Figure resubmission:</bold>--&gt;--&gt; --&gt;--&gt;--&gt;While revising your submission, we strongly recommend that you use PLOS’s NAAS tool (<ext-link ext-link-type="uri" xlink:href="https://ngplosjournals.pagemajik.ai/artanalysis" xlink:type="simple">https://ngplosjournals.pagemajik.ai/artanalysis</ext-link>) to test your figure files. NAAS can convert your figure files to the TIFF file type and meet basic requirements (such as print size, resolution), or provide you with a report on issues that do not meet our requirements and that NAAS cannot fix.--&gt;--&gt;</p>
<p>After uploading your figures to PLOS’s NAAS tool - <ext-link ext-link-type="uri" xlink:href="https://ngplosjournals.pagemajik.ai/artanalysis" xlink:type="simple">https://ngplosjournals.pagemajik.ai/artanalysis,</ext-link> NAAS will process the files provided and display the results in the "Uploaded Files" section of the page as the processing is complete. If the uploaded figures meet our requirements (or NAAS is able to fix the files to meet our requirements), the figure will be marked as "fixed" above. If NAAS is unable to fix the files, a red "failed" label will appear above. When NAAS has confirmed that the figure files meet our requirements, please download the file via the download option, and include these NAAS processed figure files when submitting your revised manuscript.--&gt;--&gt;--&gt; --&gt;--&gt;<bold>Reproducibility:</bold>--&gt;--&gt; --&gt;--&gt;To enhance the reproducibility of your results, we recommend that authors of applicable studies deposit laboratory protocols in protocols.io, where a protocol can be assigned its own identifier (DOI) such that it can be cited independently in the future. Additionally, PLOS ONE offers an option to publish peer-reviewed clinical study protocols. Read more information on sharing protocols at <ext-link ext-link-type="uri" xlink:href="https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols--&gt;" xlink:type="simple">https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols--&gt;</ext-link></p>
<supplementary-material id="pdig.0001031.s004" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.s004" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">1.docx</named-content></p>
</caption>
</supplementary-material>
<supplementary-material id="pdig.0001031.s005" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.s005" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">reviews.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pdig.0001031.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pdig.0001031.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 1</article-title>
</title-group>
<related-object document-id="10.1371/journal.pdig.0001031" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pdig.0001031.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p><named-content content-type="author-response-date">11 Mar 2026</named-content></p>
<supplementary-material id="pdig.0001031.s007" mimetype="application/msword" position="float" xlink:href="info:doi/10.1371/journal.pdig.0001031.s007" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to Reviewers.doc</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pdig.0001031.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pdig.0001031.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western"><surname>Sbaffi</surname>
<given-names>Laura</given-names>
</name>
<role>Section Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2026</copyright-year>
<copyright-holder>Laura Sbaffi</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited., which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p></license>
</permissions>
<related-object document-id="10.1371/journal.pdig.0001031" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p><named-content content-type="letter-date">9 Apr 2026</named-content></p>
<p>Identifying Factors Associated with Vaping Cessation in Young Adults: A Machine Learning and XAI Approach.</p>
<p>PDIG-D-25-00724R1</p>
<p>Dear Dr Mahajan,</p>
<p>We are pleased to inform you that your manuscript 'Identifying Factors Associated with Vaping Cessation in Young Adults: A Machine Learning and XAI Approach.' has been provisionally accepted for publication in PLOS Digital Health.</p>
<p>Before your manuscript can be formally accepted you will need to complete some formatting changes, which you will receive in a follow-up email from a member of our team.</p>
<p>Please note that your manuscript will not be scheduled for publication until you have made the required changes, so a swift response is appreciated.</p>
<p>IMPORTANT: The editorial review process is now complete. PLOS will only permit corrections to spelling, formatting or significant scientific errors from this point onwards. Requests for major changes, or any which affect the scientific understanding of your work, will cause delays to the publication date of your manuscript.</p>
<p>If your institution or institutions have a press office, please notify them about your upcoming paper to help maximize its impact. If they'll be preparing press materials, please inform our press team as soon as possible -- no later than 48 hours after receiving the formal acceptance. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact digitalhealth@plos.org.</p>
<p>Thank you again for supporting Open Access publishing; we are looking forward to publishing your work in PLOS Digital Health.</p>
<p>Best regards,</p>
<p>Laura Sbaffi, PhD, MA, MSc</p>
<p>Section Editor</p>
<p>PLOS Digital Health</p>
<p>***********************************************************</p>
<p><bold>Additional Editor Comments (if provided):</bold></p>
<p><bold>Reviewer Comments (if any, and for reference):</bold></p>
</body>
</sub-article>
</article>