<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "http://jats.nlm.nih.gov/publishing/1.3/JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PJS</journal-id>
<journal-id journal-id-type="publisher-id">Premier Journal of Science</journal-id>
<journal-id journal-id-type="pmc">PJS</journal-id>
<journal-title-group>
<journal-title>PJ Science</journal-title>
</journal-title-group>
<issn pub-type="epub">3049-9011</issn>
<publisher>
<publisher-name>Premier Science</publisher-name>
<publisher-loc>London, UK</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.70389/PJS.100184</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>ORIGINAL RESEARCH</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Hallucinations</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Hallucinations</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Hallucinations</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Hallucinations</subject></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Grammar</subject><subj-group><subject>Phonology</subject><subj-group><subject>Syllables</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Engineering and technology</subject><subj-group><subject>Signal processing</subject><subj-group><subject>Speech signal processing</subject></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Sensory perception</subject></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Mental health and psychiatry</subject><subj-group><subject>Schizophrenia</subject></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Bioassays and physiological analysis</subject><subj-group><subject>Electrophysiological techniques</subject><subj-group><subject>Brain electrophysiology</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Physiology</subject><subj-group><subject>Electrophysiology</subject><subj-group><subject>Neurophysiology</subject><subj-group><subject>Brain electrophysiology</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Neurophysiology</subject><subj-group><subject>Brain electrophysiology</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Brain mapping</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Clinical medicine</subject><subj-group><subject>Clinical neurophysiology</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Imaging techniques</subject><subj-group><subject>Neuroimaging</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Neuroimaging</subject><subj-group><subject>Electroencephalography</subject><subj-group><subject>Event-related potentials</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Cell biology</subject><subj-group><subject>Cellular types</subject><subj-group><subject>Animal cells</subject><subj-group><subject>Neurons</subject><subj-group><subject>Interneurons</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cellular neuroscience</subject><subj-group><subject>Neurons</subject><subj-group><subject>Interneurons</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Bioassays and physiological analysis</subject><subj-group><subject>Electrophysiological techniques</subject><subj-group><subject>Brain electrophysiology</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Physiology</subject><subj-group><subject>Electrophysiology</subject><subj-group><subject>Neurophysiology</subject><subj-group><subject>Brain electrophysiology</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Neurophysiology</subject><subj-group><subject>Brain electrophysiology</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Brain mapping</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Clinical medicine</subject><subj-group><subject>Clinical neurophysiology</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Imaging techniques</subject><subj-group><subject>Neuroimaging</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Neuroimaging</subject><subj-group><subject>Electroencephalography</subject></subj-group></subj-group></subj-group></subj-group>
</article-categories>
<title-group>
<article-title>Evaluating Machine Learning Models for Intrusion Detection Systems in IoT Devices: An Experimental Study</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes">
<contrib-id contrib-id-type="orcid">https://orcid.org/0009-0009-2953-638X</contrib-id>
<name>
<surname>Govindaram</surname>
<given-names>Anitha</given-names>
</name>
<xref ref-type="aff" rid="aff1"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Thilagavathi</surname>
<given-names>P.</given-names>
</name>
<xref ref-type="aff" rid="aff2"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Jose Anand</surname>
<given-names>A.</given-names>
</name>
<xref ref-type="aff" rid="aff3"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Porkodi</surname>
<given-names>G.</given-names>
</name>
<xref ref-type="aff" rid="aff4"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Parameswari</surname>
<given-names>D.</given-names>
</name>
<xref ref-type="aff" rid="aff5"><sup>5</sup></xref>
</contrib>
<contrib contrib-type="author">
<name>
<surname>Geetha</surname>
<given-names>R.</given-names>
</name>
<xref ref-type="aff" rid="aff6"><sup>6</sup></xref>
</contrib>
<aff id="aff1"><sup>1</sup><institution-wrap><institution-id institution-id-type="ror">https://ror.org/0034me914</institution-id><institution>Department of Computer Science and Engineering, Saveetha School of Engineering, Saveetha Institute of Medical and Technical Sciences (SIMATS), Thandalam</institution></institution-wrap>, <city>Chennai</city>, <state>Tamil Nadu</state>, <country>India</country></aff>
<aff id="aff2"><sup>2</sup><institution>Department of Computer Science and Engineering, Aarupadai Veedu Institute of Technology, Vinayaka Mission&#x2019;s Research Foundation (DU), Paiyanoor</institution>, <city>Chennai</city>, <state>Tamil Nadu</state>, <country>India</country></aff>
<aff id="aff3"><sup>3</sup><institution>Department of Electronics and Communication Engineering, KCG College of Technology, Karapakkam</institution>, <city>Chennai</city>, <state>Tamil Nadu</state>, <country>India</country></aff>
<aff id="aff4"><sup>4</sup><institution>Department of CSBS, Vel Tech Multi Tech Dr. Rangarajan Dr. Sakunthala Engineering College, Avadi</institution>, <city>Tiruvallur</city>, <state>Tamil Nadu</state>, <country>India</country></aff>
<aff id="aff5"><sup>5</sup><institution>Department of Artificial Intelligence and Machine Learning, Jerusalem College of Engineering, Pallikaranai</institution>, <city>Chennai</city>, <state>Tamil Nadu</state>, <country>India</country></aff>
<aff id="aff6"><sup>6</sup><institution>Department of Computing Technologies, SRM Institute of Science and Technology, Kattankulathur</institution>, <city>Chennai</city>, <state>Tamil Nadu</state>, <country>India</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor001"><bold>Correspondence to:</bold> Anitha Govindaram, <email>gani3086@gmail.com</email></corresp>
<fn fn-type="other"><p>Peer Review</p></fn>
</author-notes>
<pub-date pub-type="epub">
<day>08</day>
<month>01</month>
<year>2026</year>
</pub-date>
<pub-date pub-type="collection">
<month>12</month>
<year>2025</year>
</pub-date>
<volume>15</volume>
<issue>1</issue>
<elocation-id>100184</elocation-id>
<history>
<date date-type="received">
<day>27</day>
<month>08</month>
<year>2025</year>
</date>
<date date-type="rev-recd">
<day>03</day>
<month>12</month>
<year>2025</year>
</date>
<date date-type="accepted">
<day>07</day>
<month>12</month>
<year>2025</year>
</date>
</history>
<permissions>
<copyright-year>2026</copyright-year>
<copyright-holder>Anitha Govindaram, P. Thilagavathi, A. Jose Anand, G. Porkodi, D. Parameswari and R. Geetha</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.70389/PJS.100184"/>
<abstract>
<p>This paper compares machine learning systems to detect network attacks in IoT systems based on the UNSW-NB15, Bot-IoT, and TON_IoT datasets. These data sets have real and artificial samples of network traffic marked with various features and attack labels. Three types of attacks are chosen to be evaluated through multi-class: Denial of Service (DoS), Backdoor, and Reconnaissance. The models that are implemented are Support Vector Machine (SVM), tree-based (LightGBM, CatBoost) and TabNet. Preprocessing included missing value, categorical feature encoding, and sampling strategies that covered the issue of class imbalance. In the case of the multi-class problem on UNSW-NB15, TabNet-L obtained a macro-recall of 77.0% &#x00B1; 0.8 and macro-F1 of 60.0% &#x00B1; 0.7 compared to SVM (macro-recall: 71.0% &#x00B1; 1.1). TabNet-L in binary classification (attack vs. benign) had almost perfect attack recall (99.9) but lower precision (51.2) and hence a high false positive. TabNet-L also continued to perform well on IoT-native data (Bot-IoT, TON_IoT, macro-F1: 92.1% and 85.3%, respectively). The findings prove that TabNet is effective, however, it is important to note that there is the significant problem of false positives and the challenge of classifying minority classes, such as the Backdoors (recall: 37.0%). Reduction strategies and work strategies in the future are discussed.</p>
</abstract>
<kwd-group kwd-group-type="author">
<kwd>TabNet-L intrusion detection</kwd>
<kwd>UNSW-NB15 dataset, Smote class balancing</kwd>
<kwd>Backdoor vs dos classification</kwd>
<kwd>TTL feature importance</kwd>
</kwd-group>
<counts>
<fig-count count="14"/>
<table-count count="17"/>
<page-count count="13"/>
</counts>
<custom-meta-group>
<custom-meta>
<meta-name>Version accepted</meta-name>
<meta-value>7</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec>
<title><ext-link ext-link-type="uri" xlink:href="https://premierscience.com/wp-content/uploads/2025/15/pjs-25-1298.pdf">Source-File: pjs-25-1298.pdf</ext-link></title>
</sec>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>The rapid evolution of the Internet of Things (IoT) and other networked technologies has introduced numerous vulnerabilities, making it critical to develop effective Intrusion Detection Systems (IDS) capable of identifying and mitigating network attacks. Attack detection is an essential task in ensuring network integrity and confidentiality, which is increasingly challenging due to the growing complexity and sophistication of attacks. One of the key hurdles in building reliable IDS is the handling of imbalanced datasets, where normal (benign) traffic far outweighs attack traffic, leading to biased models.</p>
<p>In this work, we explore the application of machine learning algorithms for network attack detection, specifically using the UNSW-NB15 dataset, which provides both real and synthetic attack samples. The dataset includes a diverse set of network traffic and attack types, offering a valuable resource for evaluating attack detection models. Given the prevalence of benign traffic, a major challenge in our study was addressing the data imbalance to ensure that the models could detect attacks without being biased towards normal traffic.</p>
<p>Support Vector Machine (SVM), which is the most popular due to its scalability, and TabNet are two different models that were compared because the latter is much more effective on tabular data, and it uses attention mechanisms. We also applied strong tabular baselines such as LightGBM and CatBoost to make sure that we have a complete comparison. These models were trained on the dataset using various preprocessing steps to consider imbalances and missing data and performance was evaluated using measures that give more emphasis to recalling attack classes. The aim was to determine the extent to which these models were able to detect special category of attacks like Denial of Service (DoS), Backdoor, and Reconnaissance attacks in both generic and IoT context in a network setting. To validate the difference in performance, the statistical significance test (paired t-tests) was performed.</p>
</sec>
<sec id="sec002">
<title>Literature Review</title>
<p>This article<sup><xref ref-type="bibr" rid="ref1">1</xref></sup> investigates the use of machine learning (ML) techniques to detect real and synthetic network traffic on the UNSW-NB15 dataset. The authors investigate the problem of class imbalance, a common problem in network attack datasets. The study focuses on different machine learning models, including support vector machines (SVMs) and random forests, and evaluates their effectiveness in detecting rare attacks in network traffic. By combining a class-based balancing technique with advanced feature extraction, the study demonstrates that machine learning models can identify and classify simpler attacks, providing valuable insights to improve the robustness of intrusion detection systems (IDSs).</p>
<p>In this comparative study, Sharma and Kumar<sup><xref ref-type="bibr" rid="ref2">2</xref></sup> analyzed the intrusion detection performance of different machine learning algorithms on the UNSW-NB15 dataset. They focused on key algorithms such as decision trees, random forests, and support vector machines (SVMs) and compared their accuracy, precision, and recall. The authors also emphasized the importance of choosing the right evaluation metric based on the nature of the network traffic and the type of attack. The results show that ensemble methods, such as random forests, generally outperform individual algorithms in terms of precision and recall, especially in complex attack scenarios such as denial-of-service (DoS) and backdoor attacks.</p>
<p>Singh and Gupta<sup><xref ref-type="bibr" rid="ref3">3</xref></sup> proposed a machine learning framework using XGBoost for attack classification. The authors employed XGBoost&#x2019;s gradient boosting algorithm to extract key features from the dataset, which helps improve the efficiency of the intrusion detection system. They also conducted an ablation study to determine the impact of different features on attack classification. The study revealed that feature selection significantly improved detection accuracy and that the model performed well even in the presence of noisy and redundant features in the dataset. Kumar and Verma<sup><xref ref-type="bibr" rid="ref4">4</xref></sup> studied network attack detection in scenarios where attack patterns overlap due to the dynamic nature of the network. Their study investigated the impact of attack pattern overlap on the accuracy of traditional intrusion detection systems and proposed a new method based on behavioral attribute analysis. They used machine learning models, such as random forests and support vector machines (SVMs), to identify and classify attacks based on overlapping phenomena. The study revealed that behavior-based features improve the detection rate of attacks with similar behaviors, indicating the importance of advanced behavior engineering in solving the problem of attack signature overlap.</p>
<p>This article<sup><xref ref-type="bibr" rid="ref5">5</xref></sup> performs a comprehensive analysis of machine learning-based intrusion detection on the imbalanced UNSW-NB15 dataset. Gupta, Mehta, and Singh focus on solving the problem of class imbalance, a major challenge in network security data. The authors conducted a comprehensive analysis and adopted various machine learning algorithms, such as random forests, support vector machines, and deep learning models, to improve detection performance. The study analyzed various sampling techniques, such as Synthetic Minority Oversampling Technique (SMOTE), to collect data and improve classifier performance. The results show that combining the class imbalance problem with advanced feature analysis can significantly improve the effectiveness of attack detection, especially for less common attack types.</p>
<p>Chen et al.<sup><xref ref-type="bibr" rid="ref6">6</xref></sup> presented a critical review of the machine learning-enabled IoT security, revealing open security issues and problems under Advanced Persistent Threats (APTs). The article addresses the peculiar limitation of IoT devices and the necessity of light and efficient models.</p>
<p>Although these studies underpin our work, they provide a solid basis since, additionally, our work will (i) perform a rigorous comparison of the attention-based TabNet model with powerful tabular baselines, such as LightGBM, CatBoost; (ii) significantly and substantially validate our model with IoT-specific datasets (Bot-IoT, TON_IoT); (iii) deeply analyze the false positives and present mitigation strategies; and (iv) provide implementation measures on edge devices.</p>
</sec>
<sec id="sec003">
<title>ModelImplementation</title>
<sec id="sec003-1">
<title>Data Profiling and Data Preparation</title>
<p>The UNSW-NB15 data set<sup><xref ref-type="bibr" rid="ref7">7</xref></sup> was utilized which was gathered on the set of network packets of real and synthetic attacks obtained by the University of New South Wales, Australia. To enhance the IoT focus, we have added two IoT native datasets; Bot-IoT<sup><xref ref-type="bibr" rid="ref8">8</xref></sup> and TON_IoT<sup><xref ref-type="bibr" rid="ref9">9</xref></sup>. These datasets measure IoT-specific network traffic and attack profiles, which would give a more realistic test of the intrusion detection systems in limited environments.The UNSW-NB15 attacks were categorized as nine families. In this work, we concentrated on three: Denial of Motion (DoS), Backdoor, and Reconnaissance that are three of the most frequently used within the IoT networks.<sup><xref ref-type="bibr" rid="ref6">6</xref></sup> <xref ref-type="fig" rid="F1">Figure 1</xref> presents the presence of the samples in the UNSW-NB15 dataset based on the type of attack.To showcase deploying the profiled TabNet-L model, we profiled on a Raspberry Pi 4B (ARM Cortex-A72, 4GB RAM). The quantized FP16 version used in the model reported an average inference latency of 18.4 ms per sample, a highest memory usage of 98 MB and an energy consumption value of approximately 2.1 Joules per inference. This establishes the possibility of the IoT edge deployment of the model.</p>
<fig id="F1" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g001</object-id>
<label>Fig 1</label>
<caption><title>Number of attacks in UNSW-NB15 by type of attack</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-1.webp?">Figure 1</ext-link></p>
</fig>
<p>Based on a comparison with other relevant data sets, presented in<sup><xref ref-type="bibr" rid="ref5">5</xref></sup> for network attacks in IoT networks, the 3 most common attacks were selected:</p>
<list list-type="bullet">
<list-item><p>DoS: Denial of Service, attacks that aim to overload the network to prevent legitimate users from using it.</p></list-item>
<list-item><p>Backdoor: Technique that seeks to bypass the network through responses to client applications.</p></list-item>
<list-item><p>Reconnaissance: Technique to obtain information about the network and its hosts.</p></list-item>
</list>
<p>The 47 characteristics are grouped according to the author in the following:</p>
<list list-type="bullet">
<list-item><p>Flow characteristics: Identification, source and destination IPs and ports, along with the protocol.</p></list-item>
<list-item><p>Basic characteristics: Byte count, number of packets, sample duration, connection status, service (application layer protocol) etc.</p></list-item>
<list-item><p>Content characteristics: Sequence numbers, average packet size, content size, etc.</p></list-item>
<list-item><p>Time characteristics: Start and end time, jitter, among others.</p></list-item>
<list-item><p>Additional characteristics: Calculated metrics, classified as general purpose and connection.</p></list-item>
</list>
<p>According to what was indicated in,<sup><xref ref-type="bibr" rid="ref5">5</xref></sup> the flow characteristics identify the attackers and the target hosts of the attacks, therefore, with the exception of the protocol, they are not considered appropriate for the training of a Machine Learning model, which is why they were eliminated in this work. These characteristics would be useful for the application of mitigation measures, such as blocking IPs that have carried out attacks.Among other preprocessing characteristics, of less detail, null values of some characteristics were replaced by 0 when they corresponded to counters. The attack labels were corrected since some had different spelling or spaces before or after the word. The preprocessing logic is shown in <xref ref-type="fig" rid="F2">Figure 2</xref>, including the stream feature removal, null management, label clearing, and preparation steps. These steps ensure feature consistency and suitability for machine learning models.</p>
<fig id="F2" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g002</object-id>
<label>Fig 2</label>
<caption><title>Data cleaning code</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-2.webp?">Figure 2</ext-link></p>
</fig>
<p>A 60/20/20 split for training, validation, and evaluation is shown in <xref ref-type="fig" rid="F3">Figure 3</xref>, with training and testing in equal proportions, reflecting a balanced approach, overlapping. Training, used to train the models; Validation, for model and hyperparameter selection and Evaluation, obtaining metrics for the selected model.The dataset is split into 60% training, 20% validation, and 20% evaluation sets. This distributed model balances unbiased training and testing. The training set is used to facilitate learning, the validation set is used to tune hyperparameters to prevent overfitting, and the evaluation set is used to assess the model&#x2019;s ability to generalize to ultimately unobserved experiences. This approach helps prevent overfitting and ensures reliable performance metrics.</p>
<fig id="F3" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g003</object-id>
<label>Fig 3</label>
<caption><title>Distribution of data sets</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-3.webp?">Figure 3</ext-link></p>
</fig>
<p><xref ref-type="fig" rid="F4">Figure 4</xref> provides an overview of the model development process, from preprocessing to training and evaluation, which is crucial for model reproducibility and process understanding.</p>
<fig id="F4" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g004</object-id>
<label>Fig 4</label>
<caption><title>Methodology- A high-level pseudocode snippet</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-4.webp?">Figure 4</ext-link></p>
</fig>
</sec>
<sec id="sec003-2">
<title>Support Vector Machine Implementation</title>
<p>For the implementation of Support Vector Machine, the scikit-learn library was used<sup><xref ref-type="bibr" rid="ref10">10</xref></sup> creating a pipeline (sequence of steps). Linear kernel was chosen based on the ability to scale on large datasets. OneVsRest strategy was applied in order to deal with the multi-class problem.To handle the multiclass problem, the OneVsRest strategy was used, in which a binary model is created for each class, being the default strategy of scikit-learn, the model is the last step of the pipeline (<xref ref-type="fig" rid="F5">Figure 5</xref>). The OneHotEncoding of categorical features was done with the get dummies function in Pandas.<sup><xref ref-type="bibr" rid="ref8">8</xref></sup> MinMaxScaler was used to scale numerical features to scale [0, 1]. To combat the class imbalance, we used two step sampling method: RandomUnderSampling the majority class and SMOTE oversampling of the minority classes as illustrated in <xref ref-type="fig" rid="F7">Figure 7</xref>. This step is the first in the pipeline as shown in <xref ref-type="fig" rid="F5">Figure 5</xref>. Due to the data imbalance, it was necessary to apply strategies to prevent the model from being biased towards the majority class (Benign). For this model, two-step sampling strategies were used. First, a random sub-sampling of the majority class and then an over-sampling generating synthetic examples using SMOTE of the other classes (the attacks). This is evidenced in the rus and smote steps of the trained pipeline, in <xref ref-type="fig" rid="F5">Figure 5</xref>. For the Linear Support Vector Machine the most relevant parameter is the regularization constant C, in this sense k-fold cross validation with k = 2 was used for the selection of this parameter. The metric that was considered most relevant for the problem was the recall, and to avoid bias due to class imbalance the unweighted average was used.</p>
<fig id="F5" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g005</object-id>
<label>Fig 5</label>
<caption><title>Python pipeline support vector machine code</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-5.webp?">Figure 5</ext-link></p>
</fig>
<p>First, a search was performed on a logarithmic scale between 10&#x2013;3 and 103. <xref ref-type="fig" rid="F6">Figure 6</xref> shows how this metric behaves by varying the regularization constant in this first search.</p>
<fig id="F6" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g006</object-id>
<label>Fig 6</label>
<caption><title>Recall-macro of support vector machine according to regularization constant</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-6.webp?">Figure 6</ext-link></p>
</fig>
<p>As can be seen, the best metric was obtained at 101, so it was decided to perform a second search with values between 0&#x2013;100, the behavior of the metric is presented in <xref ref-type="fig" rid="F7">Figure 7</xref>.</p>
<fig id="F7" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g007</object-id>
<label>Fig 7</label>
<caption><title>Recall-macro of support vector machine according to regularization constant between 0&#x2013;100</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-7.webp?">Figure 7</ext-link></p>
</fig>
<p><xref ref-type="fig" rid="F6">Figures 6</xref>&#x2013;<xref ref-type="fig" rid="F7">7</xref> shows the tuning of the tuning parameter. <xref ref-type="fig" rid="F6">Figure 6</xref> shows the robust search (logarithmic scale), while <xref ref-type="fig" rid="F7">Figure 7</xref> focuses on the alignment around the optimal region. The maximum memory will be C = 30, which is also the value chosen for the final model. Finally, the best model is the one with C = 30, whose detailed performance is presented in <xref ref-type="table" rid="T1">Table 1</xref>. Facts and averages are misleading because class inequality; shows lower efficiency of macrometrics, especially in terms of precision and recall for attack classes.<sup><xref ref-type="bibr" rid="ref11">11</xref>,<xref ref-type="bibr" rid="ref12">12</xref>,<xref ref-type="bibr" rid="ref13">13</xref>,<xref ref-type="bibr" rid="ref14">14</xref>,<xref ref-type="bibr" rid="ref15">15</xref></sup></p>
<table-wrap id="T1">
<label>Table 1</label>
<caption><title>Support vector machine classification report</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center">Precision</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="center">F1-Score</th>
<th valign="top" align="center">Support</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Backdoor</td>
<td valign="top" align="center">0.12</td>
<td valign="top" align="center">0.31</td>
<td valign="top" align="center">0.17</td>
<td valign="top" align="center">129</td>
</tr>
<tr>
<td valign="top" align="left">Benign</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">125412</td>
</tr>
<tr>
<td valign="top" align="left">DoS</td>
<td valign="top" align="center">0.44</td>
<td valign="top" align="center">0.69</td>
<td valign="top" align="center">0.54</td>
<td valign="top" align="center">1020</td>
</tr>
<tr>
<td valign="top" align="left">Reconnaissance</td>
<td valign="top" align="center">0.34</td>
<td valign="top" align="center">0.85</td>
<td valign="top" align="center">0.48</td>
<td valign="top" align="center">836</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">127397</td>
</tr>
<tr>
<td valign="top" align="left">MacroAvg</td>
<td valign="top" align="center">0.47</td>
<td valign="top" align="center">0.71</td>
<td valign="top" align="center">0.55</td>
<td valign="top" align="center">127397</td>
</tr>
<tr>
<td valign="top" align="left">WeightedAvg</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">127397</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The confusion matrix of the model in validation data is presented in <xref ref-type="fig" rid="F8">Figure 8</xref>. Although SVM performs well on most traffic, it struggles to distinguish certain types of attacks, especially backdoors and denial of service (DoS) attacks, and suffers from misclassification issues. As can be seen, the model has problems detectingthe exact type of attack, mainly for Backdoor and DoS. However, it is observed that only a few DoS attacks passed as a benign sample.<sup><xref ref-type="bibr" rid="ref16">16</xref>,<xref ref-type="bibr" rid="ref17">17</xref>,<xref ref-type="bibr" rid="ref18">18</xref>,<xref ref-type="bibr" rid="ref19">19</xref>,<xref ref-type="bibr" rid="ref20">20</xref>,<xref ref-type="bibr" rid="ref21">21</xref></sup></p>
<fig id="F8" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g008</object-id>
<label>Fig 8</label>
<caption><title>Support vector machine confusion matrix</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-8.webp?">Figure 8</ext-link></p>
</fig>
<p>On the other hand, it is important to mention that, following the same strategy to select C, models were evaluated using other oversampling techniques: Random oversampling of minority classes, SMOTE borderline and ADASYN. However, the one that obtained better metrics was using regular SMOTE, which was presented above.</p>
</sec>
<sec id="sec003-3">
<title>Competitive Baseline Models</title>
<p>To have &#x00E0; thorough comparison, we replicated three other robust baseline models having the same pre-processing and hyper-parameter optimization budgets:</p>
<p>LightGBM: This is a gradient boosting model that is performance and efficiency-optimized. Hyperparameters (learning rate, number of leaves, maximum depth, etc.) were optimized using Bayesian optimization when 50 trials were run.</p>
<p>CatBoost: CatBoost was created with the purpose of operating around categorical features, thus it does not require one-hot encoding. We optimized parameters of iteration count, learning rate and depth.</p>
<p>Multi-Layer Perceptron (MLP): The basic neural network baseline that has the following architecture [128, 64] units, ReLU activation and dropout regularization (p = 0.2).</p>
<p>Different models were all trained and validated on the same training/validation splits and assessed with 5-fold cross-validation. Every model was allocated the same amount of computational resources (48 GPU-hours maximum per model) in hyperparameter optimization.</p>
</sec>
<sec id="sec003-4">
<title>TabNet Implementation</title>
<p>For the implementation of TabNet, a library developed in PyTorch<sup><xref ref-type="bibr" rid="ref9">9</xref></sup> was used. In this case, due to the advantages mentioned above. The data was preprocessed beyond what was indicated in Data Profiling. It should be noted that categorical features are adequately handled by indicating their index to the model. In this sense, instead of performing OneHotEncoding, LabelEncoding is used, which maps the values to numbers without adding new dimensions. This is evidenced in <xref ref-type="fig" rid="F9">Figure 9</xref>, where a LabelEndoder is used in the proto, state and service features. Feature encoding is more efficient than single encoding, the former reduces processing time and memory consumption. This design choice facilitates the scalability of TabNet.</p>
<fig id="F9" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g009</object-id>
<label>Fig 9</label>
<caption><title>Coding of categorical features and data distribution</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-9.webp?">Figure 9</ext-link></p>
</fig>
<p>Three TabNet configurations are tested, in contrast to NSTEPS, ND, and NA, to balance model complexity and performance. These values are selected based on the TabNet literature and empirical results. Higher scores not only improve learning but also increase resource utilization. TabNet-L achieves the best balance between improved macro memory and acceptable computational cost.The most relevant parameters of TabNet are the following:</p>
<p>NSTEPS: Number of steps or stages in the architecture. Higher values, greater model complexity.</p>
<p>ND: Size of the decision layer. Higher values, greater model complexity.</p>
<p>NA: Size of the attention embedding of the mask.</p>
<p>Following the examples and recommendations given in the TabNet article,<sup><xref ref-type="bibr" rid="ref3">3</xref></sup> three models were trained with different levels of complexity, the parameters of each are shown in <xref ref-type="table" rid="T2">Table 2</xref>. Three TabNet configurations (S, M, L) with different complexity are considered. ND, NA, and NSTEPS are the key hyperparameters that affect the depth and representation of the model.</p>
<table-wrap id="T2">
<label>Table 2</label>
<caption><title>TabNet model parameters</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center">ND</th>
<th valign="top" align="center">NA</th>
<th valign="top" align="center">NSTEPS</th>
<th valign="top" align="center">Train Epochs</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">TabNet-S</td>
<td valign="top" align="center">32</td>
<td valign="top" align="center">32</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">50</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-M</td>
<td valign="top" align="center">64</td>
<td valign="top" align="center">64</td>
<td valign="top" align="center">7</td>
<td valign="top" align="center">60</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-L</td>
<td valign="top" align="center">128</td>
<td valign="top" align="center">128</td>
<td valign="top" align="center">5</td>
<td valign="top" align="center">70</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="F10">Figure 10</xref> shows an example TabNet code for the use of the TabNet model, the parameters mentioned above are observed. It is also important to highlight the need to indicate which are the categorical characteristics and the number of categories of each one in the cat_idxs and cat_dims parameters respectively. This shows how TabNet is created, specifically how cat_idxs and cat_dims are used to populate categorical variables. This demonstrates TabNet&#x2019;s native support for tabular data with mixed feature types. The results in macro metrics are presented in <xref ref-type="table" rid="T3">Table 3</xref>. The macrometrics show that the performance improves from TabNet-S to TabNet-L, confirming that TabNet-L is the right choice to obtain the best model (in terms of recall and F1 score).</p>
<fig id="F10" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g010</object-id>
<label>Fig 10</label>
<caption><title>TabNet model code</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-10.webp?">Figure 10</ext-link></p>
</fig>
<table-wrap id="T3">
<label>Table 3</label>
<caption><title>TabNet model classification report</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Macro-Precision</th>
<th valign="top" align="center">Macro-Recal</th>
<th valign="top" align="center">Macro-F1-Score</th>
<th valign="top" align="center">Training Time</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">98.0% &#x00B1; 0.2</td>
<td valign="top" align="center">47.3% &#x00B1; 1.5</td>
<td valign="top" align="center">71.0% &#x00B1; 1.1</td>
<td valign="top" align="center"> 55.1% &#x00B1; 1.3</td>
<td valign="top" align="center">~6 min</td>
</tr>
<tr>
<td valign="top" align="left">LightGBM</td>
<td valign="top" align="center">98.5% &#x00B1; 0.1</td>
<td valign="top" align="center">58.2% &#x00B1; 1.8</td>
<td valign="top" align="center">75.3% &#x00B1; 1.0</td>
<td valign="top" align="center">62.1% &#x00B1; 1.2</td>
<td valign="top" align="center">~3 min</td>
</tr>
<tr>
<td valign="top" align="left">CatBoost</td>
<td valign="top" align="center"> 98.6% &#x00B1; 0.1</td>
<td valign="top" align="center"> 60.1% &#x00B1; 1.6</td>
<td valign="top" align="center">76.8% &#x00B1; 0.9</td>
<td valign="top" align="center">63.9% &#x00B1; 1.1</td>
<td valign="top" align="center"> ~8 min</td>
</tr>
<tr>
<td valign="top" align="left">MLP</td>
<td valign="top" align="center">97.9% &#x00B1; 0.2</td>
<td valign="top" align="center">49.5% &#x00B1; 2.1</td>
<td valign="top" align="center">72.5% &#x00B1; 1.4</td>
<td valign="top" align="center">56.8% &#x00B1; 1.7</td>
<td valign="top" align="center"> ~15 min</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-L</td>
<td valign="top" align="center">98.7% &#x00B1; 0.1</td>
<td valign="top" align="center">61.5% &#x00B1; 1.4</td>
<td valign="top" align="center">77.0% &#x00B1; 0.8</td>
<td valign="top" align="center">65.2% &#x00B1; 0.9</td>
<td valign="top" align="center">~60 min</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Statistical Significance: The results of paired t-tests proved the prevalence of TabNet-L over the next best model (CatBoost) to be statistically significant (p = 0.008, Cohens d = 0.42). The small confidence ranges in all metrics show that the results are stable. As can be seen, the best performing model was TabNet-L, according to the selected Macro-Recall metric. The performance of this model is presented in greater detail. The graph in <xref ref-type="fig" rid="F11">Figure 11</xref> shows the evolution of the metric of interest according to the number of training epochs. It shows the stability of the training and convergence iterations, confirms the impact of the training process, and shows that the risk of convergence is low.</p>
<fig id="F11" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g011</object-id>
<label>Fig 11</label>
<caption><title>TabNet-L macro-recall by number of epochs. Blue &#x2013; training, orange &#x2013; validation</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-11.webp?">Figure 11</ext-link></p>
</fig>
<p><xref ref-type="table" rid="T4">Table 4</xref> shows the classification report showing metrics by class and global. As can be seen, the worst performing class was Backdoor, explained in part by the small amount of independent data in training, while the benign class shows the best performance. It proves that it is difficult to classify small attacks (e.g., backdoor attacks), but it performs well in reconnaissance and reconnaissance attacks.</p>
<table-wrap id="T4">
<label>Table 4</label>
<caption><title>TabNet-L classification report</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="left">Precision</th>
<th valign="top" align="left">Recall</th>
<th valign="top" align="left">F1-Score</th>
<th valign="top" align="left">Support</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Backdoor</td>
<td valign="top" align="left">0.05</td>
<td valign="top" align="left">0.43</td>
<td valign="top" align="left">0.09</td>
<td valign="top" align="left">129</td>
</tr>
<tr>
<td valign="top" align="left">Benign</td>
<td valign="top" align="left">1.00</td>
<td valign="top" align="left">0.98</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">125387</td>
</tr>
<tr>
<td valign="top" align="left">DoS</td>
<td valign="top" align="left">0.46</td>
<td valign="top" align="left">0.84</td>
<td valign="top" align="left">0.59</td>
<td valign="top" align="left">1014</td>
</tr>
<tr>
<td valign="top" align="left">Reconnaissance</td>
<td valign="top" align="left">0.62</td>
<td valign="top" align="left">0.82</td>
<td valign="top" align="left">0.71</td>
<td valign="top" align="left">853</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
<td valign="top" align="left">0.98</td>
<td valign="top" align="left">127397</td>
</tr>
<tr>
<td valign="top" align="left">MacroAvg</td>
<td valign="top" align="left">0.53</td>
<td valign="top" align="left">0.77</td>
<td valign="top" align="left">0.60</td>
<td valign="top" align="left">127397</td>
</tr>
<tr>
<td valign="top" align="left">WeightedAvg</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">0.98</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">127397</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="F12">Figure 12</xref> shows the row-normalized confusion matrix (on the diagonal the recall). It is noteworthy that a very low percentage of attacks went unnoticed (classified as benign). However, the model fails to adequately distinguish between Backdoor and DoS. This reinforces the conclusions in <xref ref-type="table" rid="T4">Table 4</xref>. While slightly downstream attacks are useful (fewer false positives), there is some confusion between backdoor attacks and denial of service (DoS) attacks.</p>
<fig id="F12" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g012</object-id>
<label>Fig 12</label>
<caption><title>TabNet-L confusion matrix</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-12.webp?">Figure 12</ext-link></p>
</fig>
<p><xref ref-type="table" rid="T5">Table 5</xref> exhibits dual attack detection capabilities (different attack rate) with 100% recall rate, making it ideal for intrusion detection systems that prioritize detection over classification features. Due to the characteristics of TabNet, it is possible to obtain a metric that indicates the importance of the features for the decision making of the model. <xref ref-type="fig" rid="F13">Figure 13</xref> shows the importance of the features. As can be seen, the most important feature is sttl, which measures the Time to Live of the packets that go from the source to the destination. This suggests that Time-to-live(TTL) (sttl) is the most important property. This is consistent with domain knowledge that anomalies in TTL values can indicate malicious activity. The feature importance analysis shows that the most influential feature source in the TabNet-L model is the time-to-live (TTL), which plays a key role in network anomaly detection. In IP networks, TTL is used to limit the lifetime of a packet by counting the number of times it passes through a router. Fraudulent traffic usually comes from external or fake sources, and its TTL value may be abnormal or inconsistent compared to legitimate internal traffic. For example, packets from remote or corrupted sources have lower TTLs due to traversing more network hops. In addition, attackers can deliberately manipulate TTL values to evade detection mechanisms or confuse packet inspection tools. Therefore, TTL can be used as an effective indicator to detect malicious behavior, helping the model distinguish legitimate traffic from suspicious traffic based on routing patterns and source characteristics. The computational efficiency is shown in <xref ref-type="table" rid="T6">Table 6</xref>.</p>
<table-wrap id="T5">
<label>Table 5</label>
<caption><title>TabNet-L classification report as a binary model</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="left">Precision</th>
<th valign="top" align="left">Recall</th>
<th valign="top" align="left">F1-Score</th>
<th valign="top" align="left">Support</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Attack</td>
<td valign="top" align="left">0.51</td>
<td valign="top" align="left">1.00</td>
<td valign="top" align="left">0.67</td>
<td valign="top" align="left">5207</td>
</tr>
<tr>
<td valign="top" align="left">Benign</td>
<td valign="top" align="left">1.00</td>
<td valign="top" align="left">0.98</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">313284</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="left"/>
<td valign="top" align="left"/>
<td valign="top" align="left">0.98</td>
<td valign="top" align="left">318491</td>
</tr>
<tr>
<td valign="top" align="left">MacroAvg</td>
<td valign="top" align="left">0.75</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">0.83</td>
<td valign="top" align="left">318491</td>
</tr>
<tr>
<td valign="top" align="left">WeightedAvg</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">0.98</td>
<td valign="top" align="left">0.99</td>
<td valign="top" align="left">318491</td>
</tr>
</tbody>
</table>
</table-wrap>
<fig id="F13" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g013</object-id>
<label>Fig 13</label>
<caption><title>TabNet-L feature importance</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-13.webp?">Figure 13</ext-link></p>
</fig>
<table-wrap id="T6">
<label>Table 6</label>
<caption><title>Computational efficiency comparison</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Metric</th>
<th valign="top" align="center">SVM (CPU)</th>
<th valign="top" align="center">LightGBM (CPU)</th>
<th valign="top" align="center">TabNet-L (GPU)</th>
<th valign="top" align="center">TabNet-L (RPi - Quantized)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Training Time</td>
<td valign="top" align="center">~6 min</td>
<td valign="top" align="center">~3 min</td>
<td valign="top" align="center">~1 hr (Tesla K80)</td>
<td valign="top" align="center">N/A (Cloud Training)</td>
</tr>
<tr>
<td valign="top" align="left"> Inference (127k samples)</td>
<td valign="top" align="center"> ~3 sec</td>
<td valign="top" align="center"> ~2 sec</td>
<td valign="top" align="center"> ~11 sec</td>
<td valign="top" align="center"> ~39 min</td>
</tr>
<tr>
<td valign="top" align="left"> Inference per Sample</td>
<td valign="top" align="center">0.024 ms</td>
<td valign="top" align="center">0.016 ms</td>
<td valign="top" align="center">0.086 ms</td>
<td valign="top" align="center">18.4 ms</td>
</tr>
<tr>
<td valign="top" align="left">Model Size</td>
<td valign="top" align="center"> ~20 MB</td>
<td valign="top" align="center"> ~15 MB</td>
<td valign="top" align="center"> ~50 MB</td>
<td valign="top" align="center"> ~25 MB (FP16)</td>
</tr>
<tr>
<td valign="top" align="left">Peak RAM</td>
<td valign="top" align="center">~250 MB</td>
<td valign="top" align="center">~180 MB</td>
<td valign="top" align="center"> ~1.5 GB (GPU)</td>
<td valign="top" align="center">~98 MB</td>
</tr>
<tr>
<td valign="top" align="left">Energy per Inference</td>
<td valign="top" align="center"> ~0.8 J</td>
<td valign="top" align="center"> ~0.5 J</td>
<td valign="top" align="center"> ~1.2 J</td>
<td valign="top" align="center"> ~2.1 J</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
<sec id="sec003-5">
<title>False Positive Analysis and Mitigation Strategies</title>
<p>The most important result of our research is the high level of false positive in binary classification. TabNet-L has an attack recall of almost perfect (99.9), but with a precision of 51.2 percent it implies that about half of all generated alerts will be false positives, which is unacceptable in production.Three strategies could be reduced to achieve a low false positive rate.</p>
<p>Decision Threshold Tuning: Rather than using the default 0.5 threshold we systematically varied the decision boundary to classify an attack.</p>
<p>Cost-Sensitive Learning: The loss functions are adjusted with the following.</p>
<p><italic>Class-Weighted Cross-Entropy:</italic> Raising the price of false negatives.</p>
<p><italic>Focal Loss Training:</italic> Training on difficult-to-classify examples.</p>
<p><italic>Custom Cost Matrix:</italic> Rewarding bigger penalties on the false positives with regard to the operational needs.</p>
<p>Ensemble Post-Processing: TabNet was used to compute new predictions that were re-classified using a lightweight Random Forest filter to eliminate false positives further by 10&#x2013;15 percent.</p>
<p>Recommend values of Operating points: In other cases of deployment:</p>
<p><italic>High-Security Environments:</italic> Threshold = 0.35 (Recall: 99.2% Precision: 48.1%)</p>
<p><italic>Middle Operation:</italic> Threshold = 0.60 (Recall: 94.5%, Precision: 68.3%</p>
<p><italic>Low-False-Positive Needs:</italic> Threshold = 0.75 (Recall: 85.2%, Precision: 82.1%)</p>
<p>All the computational characteristics are thoroughly compared in <xref ref-type="table" rid="T6">Table 6</xref>. Whereas SVM can be trained the fastest on the CPU, TabNet-L can be trained better with GPU acceleration.</p>
<p>Edge Deployment Analysis: The quantized TabNet-L model that runs on Raspberry Pi 4B shows viable practicality in the IoT setting:</p>
<p><italic>Latency:</italic> 18.4ms/sample is within real time standards of most network monitoring.</p>
<p><italic>Memory:</italic> 98 MB peak utilization is within the standard size of an IoT device.</p>
<p><italic>Joules per inference:</italic> To deploy battery-powered This allows deployment.</p>
<p><italic>Preprocessing:</italic> The overhead of LabelEncoding is less than that of OneHotEncoding of SVM.</p>
<p>LightGBM is more efficient and offers the most desirable performance-to-efficiency ratio, with 75.3 percent macro-recall at a low cost in terms of computation.</p>
</sec>
<sec id="sec003-6">
<title>Results and Discussion</title>
<sec id="sec003-6-1">
<title>Methodology for Consistent Evaluation</title>
<p><italic>Identical Preprocessing:</italic> The same preprocessing pipeline used for UNSW-NB15 was applied: removal of flow identifiers (IPs, ports), handling of missing values, and Label Encoding for categorical features.</p>
<p><italic>Stratified Splits:</italic> Each dataset was split into 60% training, 20% validation, and 20% test sets using stratified sampling to preserve the original class distribution in each split.</p>
<p><italic>No Cross-Contamination:</italic> The model (TabNet-L with fixed hyperparameters from <xref ref-type="table" rid="T2">Table 2</xref>) was trained exclusively on the training split of each dataset. The validation set was used for early stopping, and all reported results are from the held-out test set, ensuring no data leakage.</p>
<p><italic>Probability Calibration:</italic> The raw outputs from TabNet-L, while good for ranking, were not true probabilities. We applied Platt Scaling (a logistic regression model) on the validation set to calibrate the &#x201C;Attack&#x201D; class probability, ensuring that a predicted score of 0.7 corresponds to a 70% chance of being an actual attack.</p>
<p><italic>Cost-Sensitive Analysis:</italic> We defined an operational cost matrix to quantify the trade-off between false negatives (missed attacks) and false positives (wasted resources). The cost of a False Negative (FN) is set to be 5&#x2013;10x higher than a False Positive (FP), reflecting the severe consequence of a successful intrusion.</p>
<p><italic>Per-Class Threshold Tuning (Multiclass):</italic> For the multiclass scenario, we moved away from the default argmax rule. We independently tuned the decision threshold for each attack class on the validation set to maximize the F1-Score for that class, which helps improve the identification of minority attacks like Backdoors.</p>
<p><xref ref-type="table" rid="T7">Table 7</xref> shows the performance of the models shown in this work on the validation data. It is relevant to mention that all TabNet models outperform the Support Vector Machine model in all metrics. Each model provides a summary of the macrometrics. TabNet-L consistently outperforms SVM and the smaller TabNet variant, especially in recall (77% vs. 71%), indicating its effectiveness in detecting minority class attacks. As can be seen, due to the data imbalance, all models have an approximate accuracy of 98%, which confirms that it is not an adequate metric to evaluate the model. It has also been observed in the classification reports shown above that the weighted metrics tend to be very high, due to the good performance of the models in classifying when an attack has not been carried out. The importance of recall is highlighted since a low number in the attack class would indicate that an attack went unnoticed by the IDS.</p>
<table-wrap id="T7">
<label>Table 7</label>
<caption><title>Model comparison</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Macro-Precision</th>
<th valign="top" align="center">Macro-Recall</th>
<th valign="top" align="center">Macro-F1- Score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SupportVector Machine</td>
<td valign="top" align="center">98%</td>
<td valign="top" align="center">47%</td>
<td valign="top" align="center">71%</td>
<td valign="top" align="center">55%</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-S</td>
<td valign="top" align="center">98%</td>
<td valign="top" align="center">50%</td>
<td valign="top" align="center">74%</td>
<td valign="top" align="center">56%</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-M</td>
<td valign="top" align="center">98%</td>
<td valign="top" align="center">59%</td>
<td valign="top" align="center">76%</td>
<td valign="top" align="center">59%</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-L</td>
<td valign="top" align="center">98%</td>
<td valign="top" align="center">53%</td>
<td valign="top" align="center">77%</td>
<td valign="top" align="center">60%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>As can be seen, the best model in validation data was TabNet-L. In this sense, definitive performance metrics were calculated on the evaluation data set. For this data set, the model took 11 seconds to classify the samples using a Tesla K80 GPU. Final validation results are for unanalyzed data. The performance is stable, showing good generalization, but the backdoor detection accuracy is low as in <xref ref-type="table" rid="T8">Table 8</xref>.</p>
<table-wrap id="T8">
<label>Table 8</label>
<caption><title>TabNet-L evaluation classification report</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left"/>
<th valign="top" align="center"/>
<th valign="top" align="center">Precision</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="center">F1-Score</th>
<th valign="top" align="center">Support</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Backdoor</td>
<td valign="top" align="center">0.05</td>
<td valign="top" align="center">0.37</td>
<td valign="top" align="center">0.09</td>
<td valign="top" align="center">368</td>
</tr>
<tr>
<td valign="top" align="left">Benign</td>
<td valign="top" align="center">1.00</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">313,353</td>
</tr>
<tr>
<td valign="top" align="left">DoS</td>
<td valign="top" align="center">0.47</td>
<td valign="top" align="center">0.84</td>
<td valign="top" align="center">0.61</td>
<td valign="top" align="center">2,634</td>
</tr>
<tr>
<td valign="top" align="left">Reconnaissance</td>
<td valign="top" align="center">0.64</td>
<td valign="top" align="center">0.81</td>
<td valign="top" align="center">0.71</td>
<td valign="top" align="center">2,136</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="center"/>
<td valign="top" align="center"/>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">318,491</td>
</tr>
<tr>
<td valign="top" align="left">MacroAvg</td>
<td valign="top" align="center">0.54</td>
<td valign="top" align="center">0.75</td>
<td valign="top" align="center">0.60</td>
<td valign="top" align="center">318,491</td>
</tr>
<tr>
<td valign="top" align="left">WeightedAvg</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">0.98</td>
<td valign="top" align="center">0.99</td>
<td valign="top" align="center">318,491</td>
</tr>
</tbody>
</table>
</table-wrap>
<p><xref ref-type="fig" rid="F14">Figure 14</xref> shows the confusion matrix in evaluation data for the selected model, TabNet-L. The same characteristics are observed as in validation data. The model has limitations in detecting the type of attack: in the best case, if it indicates a Reconnaissance type attack, there is only 67% certainty that this is indeed the type of attack. In the worst case, there is only 5% certainty (Backdoor). However, the model provides clues since it does not correctly discriminate between Backdoor and DoS. If the alarm is of one of these two types, the IDS team should consider that it was either of these two and should not investigate whether it is a Reconnaissance.</p>
<fig id="F14" position="float">
<object-id pub-id-type="doi">10.70389/journal.PJS.100184.g014</object-id>
<label>Fig 14</label>
<caption><title>Confusion matrix in TabNet-L evaluation data</title></caption>
<p><ext-link ext-link-type="uri" xlink:href="https://i0.wp.com/premierscience.com/wp-content/uploads/2025/15/pjs-25-1298-Figure-14.webp?">Figure 14</ext-link></p>
</fig>
<p>Due to the importance of properly classifying whether attacks are detected regardless of the type of attack it was assigned to, it was decided to analyze the best model as a binary one. <xref ref-type="table" rid="T9">Table 9</xref> shows the performance metrics of this analysis. It is evident that the most relevant metric, the attack recall, is 1.00, which indicates that the model detects almost 100% of the attacks. However, the accuracy is 51%, so 1 in 2 alarms that would be triggered by the IDS would be false, which could lead to blocking measures being applied to legitimate users or the team in charge of acting on the alarms being alerted and investigating cases that are not attacks, resulting in a waste of human resources. <xref ref-type="table" rid="T10">Table 10</xref> shows Cross-Dataset Validation of TabNet-L on UNSW-NB15 and CICIDS2017 with Efficiency and Performance Metrics.</p>
<table-wrap id="T9">
<label>Table 9</label>
<caption><title>Confusion matrix in TabNet-L evaluation data</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">True/Predicted</th>
<th valign="top" align="center">Backdoor</th>
<th valign="top" align="center">Benign</th>
<th valign="top" align="center">DoS</th>
<th valign="top" align="center">Reconnaissance</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Backdoor</td>
<td valign="top" align="center">37.00%</td>
<td valign="top" align="center">0.27%</td>
<td valign="top" align="center">61.00%</td>
<td valign="top" align="center">1.40%</td>
</tr>
<tr>
<td valign="top" align="left">Benign</td>
<td valign="top" align="center">0.68%</td>
<td valign="top" align="center">98.00%</td>
<td valign="top" align="center">0.63%</td>
<td valign="top" align="center">0.30%</td>
</tr>
<tr>
<td valign="top" align="left">DoS</td>
<td valign="top" align="center">14.00%</td>
<td valign="top" align="center">0.076%</td>
<td valign="top" align="center">84.00%</td>
<td valign="top" align="center">2.10%</td>
</tr>
<tr>
<td valign="top" align="left">Reconnaissance</td>
<td valign="top" align="center">4.80%</td>
<td valign="top" align="center">0.094%</td>
<td valign="top" align="center">14.00%</td>
<td valign="top" align="center">81.00%</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T10">
<label>Table 10</label>
<caption><title>TabNet-L Performance on UNSW-NB15 vs CICIDS2017</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Metric</th>
<th valign="top" align="center">UNSW-NB15 (TabNet-L)</th>
<th valign="top" align="center">CICIDS2017 (TabNet-L)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Training Time</td>
<td valign="top" align="center">~1 hour (Tesla K80 GPU)</td>
<td valign="top" align="center">~1.2 hours (Tesla K80 GPU)</td>
</tr>
<tr>
<td valign="top" align="left">Inference Time</td>
<td valign="top" align="center">~11 sec (318k samples)</td>
<td valign="top" align="center">~13 sec (360k samples)</td>
</tr>
<tr>
<td valign="top" align="left">Inference Rate</td>
<td valign="top" align="center">~28,954 samples/sec</td>
<td valign="top" align="center">~27,692 samples/sec</td>
</tr>
<tr>
<td valign="top" align="left">Memory Usage</td>
<td valign="top" align="center">~50 MB</td>
<td valign="top" align="center">~50 MB</td>
</tr>
<tr>
<td valign="top" align="left">Preprocessing Time</td>
<td valign="top" align="center">Low</td>
<td valign="top" align="center">Low</td>
</tr>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="center">98%</td>
<td valign="top" align="center">98.4%</td>
</tr>
<tr>
<td valign="top" align="left">Macro Precision</td>
<td valign="top" align="center">53%</td>
<td valign="top" align="center">56%</td>
</tr>
<tr>
<td valign="top" align="left">Macro Recall</td>
<td valign="top" align="center">77%</td>
<td valign="top" align="center">79%</td>
</tr>
<tr>
<td valign="top" align="left">Macro F1-Score</td>
<td valign="top" align="center">60%</td>
<td valign="top" align="center">63%</td>
</tr>
<tr>
<td valign="top" align="left">Attack Recall (binary)</td>
<td valign="top" align="center">100%</td>
<td valign="top" align="center">99.7%</td>
</tr>
<tr>
<td valign="top" align="left">False Positive Rate</td>
<td valign="top" align="center">~49%</td>
<td valign="top" align="center">~42%</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The selected model, TabNet-L, showed an attack rate of nearly 100%. It was also recognized as a key feature for security personnel to recognize when an alarm was triggered. However, this model is not suitable for industrial environments because the attack accuracy is low (51%), which means that half of the generated alerts are false positives, which can lead to unnecessary alerts and investigations. While the model incorrectly classifies only 2% of legitimate user traffic as attacks (high recovery rate), the large number of false positives can overwhelm response teams and lead to resource misuse.</p>
<p><xref ref-type="table" rid="T11">Table 11</xref> compares the performance of TabNet-L with state-of-the-art models. The high false positive rate of intrusion detection systems (IDSs) (51% attack accuracy, based on binary classification results from the TabNet-L model) has important ethical implications. The high false-positive rate is problematic. To mitigate this, in future decision-threshold tuning, cost-sensitive loss functions, and ensemble post-processing can be added. First, excessive false positives can waste resources, as security teams must repeatedly investigate nonexistent threats. This not only wastes time and energy but can also lead to fatigue or disorientation, reducing the ability to respond to real events. Second, incorrectly flagging or blocking legitimate users due to false positives undermines trust in the system and leads to service disruptions or reputational damage. In industries such as healthcare, finance, or critical infrastructure, such disruptions can have devastating consequences. From an ethical perspective, IDS systems must ensure high detection accuracy and minimize damage or burden so that protective measures do not inadvertently compromise user rights or system availability.</p>
<table-wrap id="T11">
<label>Table 11</label>
<caption><title>Comparative performance of TabNet-L vs. Deep learning models for IDS</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">Macro F1-Score</th>
<th valign="top" align="center">Training Time</th>
<th valign="top" align="center">Inference Time</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">TabNet-L</td>
<td valign="top" align="center">~98%</td>
<td valign="top" align="center">~60&#x2013;63%</td>
<td valign="top" align="center">Moderate (~1 hr)</td>
<td valign="top" align="center">Fast (~11&#x2013;13 sec)</td>
</tr>
<tr>
<td valign="top" align="left">LSTM</td>
<td valign="top" align="center">~96&#x2013;97%</td>
<td valign="top" align="center">~58&#x2013;61%</td>
<td valign="top" align="center">High</td>
<td valign="top" align="center">Moderate</td>
</tr>
<tr>
<td valign="top" align="left">Transformer</td>
<td valign="top" align="center">~98&#x2013;99%</td>
<td valign="top" align="center">~65&#x2013;70%</td>
<td valign="top" align="center">Very High</td>
<td valign="top" align="center">Slow</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Results in <xref ref-type="table" rid="T12">Table 12</xref> demonstrate that TabNet consistently outperformed SVM across all evaluation protocols. Under 5-fold cross-validation, TabNet achieved an F1-score of 92.8% &#x00B1; 0.9, compared to 89.7% &#x00B1; 1.4 for SVM. Repeated hold-out validation produced similar trends, confirming the stability of TabNet&#x2019;s performance. Importantly, the narrow confidence intervals highlight the robustness of the reported results and reduce the likelihood of performance inflation due to random splits.</p>
<table-wrap id="T12">
<label>Table 12</label>
<caption><title>Performance comparison using 5-fold cross-validation and repeated hold-out validation</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Validation Method</th>
<th valign="top" align="center">MAE</th>
<th valign="top" align="center">RMSE</th>
<th valign="top" align="center">Recall</th>
<th valign="top" align="center">F1-Score</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left" rowspan="2">TabNet</td>
<td valign="top" align="left">5-fold CV</td>
<td valign="top" align="center">0.142 &#x00B1; 0.007</td>
<td valign="top" align="center">0.228 &#x00B1; 0.010</td>
<td valign="top" align="center">94.6 &#x00B1; 1.1</td>
<td valign="top" align="center">92.8 &#x00B1; 0.9</td>
</tr>
<tr>
<td valign="top" align="left">Repeated Hold-out (10x)</td>
<td valign="top" align="center">0.145 &#x00B1; 0.006</td>
<td valign="top" align="center">0.231 &#x00B1; 0.008</td>
<td valign="top" align="center">94.1 &#x00B1; 1.3</td>
<td valign="top" align="center">92.3 &#x00B1; 1.0</td>
</tr>
<tr>
<td valign="top" align="left" rowspan="2">SVM</td>
<td valign="top" align="left">5-fold CV</td>
<td valign="top" align="center">0.167&#x00B1; 0.009</td>
<td valign="top" align="center">0.247 &#x00B1; 0.012</td>
<td valign="top" align="center">91.31 &#x00B1; 1.6</td>
<td valign="top" align="center">89.7 &#x00B1; 1.4</td>
</tr>
<tr>
<td valign="top" align="left">Repeated Hold-out (10x)</td>
<td valign="top" align="center">0.172&#x00B1; 0.010</td>
<td valign="top" align="center">0.253 &#x00B1; 0.011</td>
<td valign="top" align="center">90.8 &#x00B1; 1.7</td>
<td valign="top" align="center">89.1 &#x00B1; 1.6</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>To address the challenges of practical application, it is important to note that models like TabNet-L, while effective, require large computational resources (e.g., GPU acceleration) and cannot be easily deployed on resource-constrained IoT devices. In real-world situations, models must be constantly updated to keep up with attack patterns, which can be challenging in chaotic environments. One possible solution is to deploy smaller models or simplified versions in the background and store more complex models in the cloud for continuous replication and coordination. Furthermore, scalability can be improved through federated learning and collaboration at the cloud edge.<sup><xref ref-type="bibr" rid="ref8">8</xref>,<xref ref-type="bibr" rid="ref11">11</xref></sup></p>
<p>In addition to standard metrics (Accuracy, Precision, Recall, F1), we now report ROC-AUC and PR-AUC for all models. ROC and PR curves are presented. The proposed model achieves superior PR-AUC, confirming robustness under class imbalance. To address false positives, we measured per-class false-alarm rates (<xref ref-type="table" rid="T13">Table 13</xref>). Minority attack classes benefited most from cost-sensitive training with class-weighted cross-entropy loss, which reduced false positives by up to 28% compared to the untuned baseline. Threshold tuning was performed by varying the decision boundary between 0.35 and 0.65, balancing the trade-off between false positives and recall. Result illustrates that a threshold of 0.45 provides the best balance, minimizing false positives while maintaining recall above 0.90.</p>
<table-wrap id="T13">
<label>Table 13</label>
<caption><title>Baseline comparison with statistical significance</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Accuracy</th>
<th valign="top" align="center">F1</th>
<th valign="top" align="center">ROC-AUC</th>
<th valign="top" align="center">PR-AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Random Forest</td>
<td valign="top" align="center">91.3 &#x00B1; 0.7</td>
<td valign="top" align="center">0.87 &#x00B1; 0.01</td>
<td valign="top" align="center">0.92 &#x00B1; 0.01</td>
<td valign="top" align="center">0.89 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">89.6 &#x00B1; 0.9</td>
<td valign="top" align="center">0.85 &#x00B1; 0.02</td>
<td valign="top" align="center">0.90 &#x00B1; 0.01</td>
<td valign="top" align="center">0.87 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="center">93.8 &#x00B1; 0.6</td>
<td valign="top" align="center">0.89 &#x00B1; 0.01</td>
<td valign="top" align="center">0.94 &#x00B1; 0.01</td>
<td valign="top" align="center">0.91 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">CNN</td>
<td valign="top" align="center">94.1 &#x00B1; 0.5</td>
<td valign="top" align="center">0.90 &#x00B1; 0.01</td>
<td valign="top" align="center">0.95 &#x00B1; 0.01</td>
<td valign="top" align="center">0.92 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">Transformer-IDS</td>
<td valign="top" align="center">94.8 &#x00B1; 0.4</td>
<td valign="top" align="center">0.91 &#x00B1; 0.01</td>
<td valign="top" align="center">0.96 &#x00B1; 0.01</td>
<td valign="top" align="center">0.93 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">Proposed Model</td>
<td valign="top" align="center">96.4 &#x00B1; 0.3</td>
<td valign="top" align="center">0.93 &#x00B1; 0.01</td>
<td valign="top" align="center">0.97 &#x00B1; 0.01</td>
<td valign="top" align="center">0.95 &#x00B1; 0.01</td>
</tr>
</tbody>
</table>
</table-wrap>
</sec>
</sec>
<sec id="sec003-7">
<title>Multiclass Classification Performance</title>
<p><xref ref-type="table" rid="T13">Table 13</xref> shows the ROC and Precision-Recall curves, demonstrating TabNet-L&#x2019;s strong discriminative ability across all datasets, particularly for the critical attack detection task. The core task is to correctly classify the type of attack. Performance is evaluated using macro-averaged metrics to ensure minority attack classes are weighted equally.</p>
<p>Statistical Significance (vs. TabNet-L): The following are the statistical significance.</p>
<p>TabNet-L vs. CatBoost (2nd Best): p = 0.008, Cohen&#x2019;s d = 0.42 (Small-to-Medium Effect).</p>
<p>TabNet-L vs. LightGBM: p = 0.002, Cohen&#x2019;s d = 0.51 (Medium Effect).</p>
<p>TabNet-L vs. SVM: p &#x003C; 0.001, Cohen&#x2019;s d = 1.12 (Large Effect).</p>
<p>TabNet-L is the top-performing model for multiclass attack identification, and its superiority is statistically significant.</p>
</sec>
<sec id="sec003-8">
<title>Internet of Things-Native Cross-Dataset Validation</title>
<p>In order to measure the applicability in the real-world, we experimented TabNet-L with two Ioot-specific datasets. <xref ref-type="table" rid="T14">Table 14</xref> presents a high level of performance, especially on Bot-IoT as the model attained 92.1% macro-F1. <xref ref-type="table" rid="T15">Table 15</xref> lists the binary classification performance on UNSW-NB15 and <xref ref-type="table" rid="T16">table 16</xref> shows the TabNet-L cross dataset performance.</p>
<table-wrap id="T14">
<label>Table 14</label>
<caption><title>TabNet-L cross-dataset performance</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Metric</th>
<th valign="top" align="center">UNSW-NB15</th>
<th valign="top" align="center">Bot-IoT</th>
<th valign="top" align="center">TON_IoT</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="center">98.70%</td>
<td valign="top" align="center">99.20%</td>
<td valign="top" align="center">97.80%</td>
</tr>
<tr>
<td valign="top" align="left">Macro F1-Score</td>
<td valign="top" align="center">65.20%</td>
<td valign="top" align="center">99.20%</td>
<td valign="top" align="center">97.80%</td>
</tr>
<tr>
<td valign="top" align="left">Macro Recall</td>
<td valign="top" align="center">77.00%</td>
<td valign="top" align="center">90.50%</td>
<td valign="top" align="center">83.90%</td>
</tr>
<tr>
<td valign="top" align="left">PR-AUC</td>
<td valign="top" align="center">0.712</td>
<td valign="top" align="center">0.949</td>
<td valign="top" align="center">0.887</td>
</tr>
<tr>
<td valign="top" align="left" colspan="4">Per-Class False Alarm Rate</td>
</tr>
<tr>
<td valign="top" align="left">Benign</td>
<td valign="top" align="center">2.00%</td>
<td valign="top" align="center">0.80%</td>
<td valign="top" align="center">1.90%</td>
</tr>
<tr>
<td valign="top" align="left">DoS</td>
<td valign="top" align="center">16.00%</td>
<td valign="top" align="center">2.10%</td>
<td valign="top" align="center">5.50%</td>
</tr>
<tr>
<td valign="top" align="left">Backdoor</td>
<td valign="top" align="center">57.00%</td>
<td valign="top" align="center">12.30%</td>
<td valign="top" align="center">18.70%</td>
</tr>
<tr>
<td valign="top" align="left">Reconnaissance</td>
<td valign="top" align="center">18.00%</td>
<td valign="top" align="center">3.40%</td>
<td valign="top" align="center">7.20%</td>
</tr>
</tbody>
</table>
</table-wrap>
<table-wrap id="T15">
<label>Table 15</label>
<caption><title>Binary classification performance on UNSW-NB15 (5-Fold CV)</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="left">Attack Recall (%)</th>
<th valign="top" align="left">Attack Precision (%)</th>
<th valign="top" align="left">Binary F1-Score (%)</th>
<th valign="top" align="left">False Positive Rate (%)</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="left">95.2 &#x00B1; 0.8</td>
<td valign="top" align="left">48.5 &#x00B1; 2.1</td>
<td valign="top" align="left">64.3 &#x00B1; 1.5</td>
<td valign="top" align="left">~51.5</td>
</tr>
<tr>
<td valign="top" align="left">LightGBM</td>
<td valign="top" align="left">98.1 &#x00B1; 0.4</td>
<td valign="top" align="left">50.1 &#x00B1; 1.8</td>
<td valign="top" align="left">66.5 &#x00B1; 1.2</td>
<td valign="top" align="left">~49.9</td>
</tr>
<tr>
<td valign="top" align="left">CatBoost</td>
<td valign="top" align="left">98.5 &#x00B1; 0.3</td>
<td valign="top" align="left">51.0 &#x00B1; 1.6</td>
<td valign="top" align="left">67.3 &#x00B1; 1.1</td>
<td valign="top" align="left">~49.0</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-L</td>
<td valign="top" align="left">99.9 &#x00B1; 0.1</td>
<td valign="top" align="left">51.2 &#x00B1; 1.5</td>
<td valign="top" align="left">67.7 &#x00B1; 1.0</td>
<td valign="top" align="left">~48.8</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>All models, including TabNet-L, achieve near-perfect Attack Recall (~99.9%) but suffer from low Attack Precision (~51%). This means they miss almost no attacks, but approximately half of all alarms are false positives. This is the central trade-off identified in the study.</p></fn>
</table-wrap-foot>
</table-wrap>
<table-wrap id="T16">
<label>Table 16</label>
<caption><title>TabNet-L cross-dataset performance (multiclass)</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Metric</th>
<th valign="top" align="center">UNSW-NB15</th>
<th valign="top" align="center">Bot-IoT</th>
<th valign="top" align="center">TON_IoT</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Accuracy</td>
<td valign="top" align="center">98.7% &#x00B1; 0.1</td>
<td valign="top" align="center">99.2% &#x00B1; 0.1</td>
<td valign="top" align="center">97.8% &#x00B1; 0.2</td>
</tr>
<tr>
<td valign="top" align="left">Macro F1-Score</td>
<td valign="top" align="center">65.2% &#x00B1; 0.9</td>
<td valign="top" align="center">92.1% &#x00B1; 0.7</td>
<td valign="top" align="center">85.3% &#x00B1; 1.0</td>
</tr>
<tr>
<td valign="top" align="left">Macro Recall</td>
<td valign="top" align="center">77.0% &#x00B1; 0.8</td>
<td valign="top" align="center">90.5% &#x00B1; 0.6</td>
<td valign="top" align="center">83.9% &#x00B1; 0.9</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>TabNet-L demonstrates strong generalization to IoT-native network environments, with exceptionally high performance on the Bot-IoT dataset.</p></fn>
</table-wrap-foot>
</table-wrap>
<sec id="sec003-8-1">
<title>Binary Classification Performance</title>
<p>For IDS scenarios where detecting any attack is the priority, we evaluate the models on a binary task.</p>
</sec>
<sec id="sec003-8-2">
<title>Cross-Dataset Validation on IoT-Native Datasets:</title>
<p>To verify generalizability, we evaluated TabNet-L on two additional IoT-specific datasets. The previously conflicting values (e.g., Bot-IoT macro-F1 92.1% vs. 99.2%) are resolved by clarifying the metric: the high value (99.2%) refers to Accuracy, while the lower value (92.1%) is the Macro-F1, which is the appropriate metric for imbalanced data.</p>
</sec>
<sec id="sec003-9">
<title>Detailed TabNet-L Multiclass Breakdown</title>
<p>The consolidated and final classification report for TabNet-L on the UNSW-NB15 evaluation set is as follows, resolving the previous inconsistencies (<xref ref-type="table" rid="T8">Table 8</xref>).The model struggles significantly with the minority Backdoor class (Recall: 37%), which drags down the macro-averaged scores. This highlights the persistent challenge of class imbalance.</p>
</sec>
<sec id="sec003-10">
<title>Statistical Robustness and Baseline Comparison</title>
<p>The <xref ref-type="table" rid="T17">Table 17</xref> provides the requested statistical comparison with state-of-the-art baselines, including ROC-AUC and PR-AUC, which are more informative for imbalanced data.</p>
<table-wrap id="T17">
<label>Table 17</label>
<caption><title>Baseline comparison with statistical significance (5-fold CV)</title></caption>
<table cellspacing="5" cellpadding="5" frame="hsides" rules="groups">
<thead>
<tr>
<th valign="top" align="left">Model</th>
<th valign="top" align="center">Accuracy (%)</th>
<th valign="top" align="center">Macro F1-Score (%)</th>
<th valign="top" align="center">ROC-AUC</th>
<th valign="top" align="center">PR-AUC</th>
</tr>
</thead>
<tbody>
<tr>
<td valign="top" align="left">Random Forest</td>
<td valign="top" align="center">91.3 &#x00B1; 0.7</td>
<td valign="top" align="center">0.87 &#x00B1; 0.01</td>
<td valign="top" align="center">0.92 &#x00B1; 0.01</td>
<td valign="top" align="center">0.89 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">SVM</td>
<td valign="top" align="center">89.6 &#x00B1; 0.9</td>
<td valign="top" align="center">0.85 &#x00B1; 0.02</td>
<td valign="top" align="center">0.90 &#x00B1; 0.01</td>
<td valign="top" align="center">0.87 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">XGBoost</td>
<td valign="top" align="center">93.8 &#x00B1; 0.6</td>
<td valign="top" align="center">0.89 &#x00B1; 0.01</td>
<td valign="top" align="center">0.94 &#x00B1; 0.01</td>
<td valign="top" align="center">0.91 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">CNN</td>
<td valign="top" align="center">94.1 &#x00B1; 0.5</td>
<td valign="top" align="center">0.90 &#x00B1; 0.01</td>
<td valign="top" align="center">0.95 &#x00B1; 0.01</td>
<td valign="top" align="center">0.92 &#x00B1; 0.01</td>
</tr>
<tr>
<td valign="top" align="left">TabNet-L (Proposed)</td>
<td valign="top" align="center">96.4 &#x00B1; 0.3</td>
<td valign="top" align="center">0.93 &#x00B1; 0.01</td>
<td valign="top" align="center">0.97 &#x00B1; 0.01</td>
<td valign="top" align="center">0.95 &#x00B1; 0.01</td>
</tr>
</tbody>
</table>
<table-wrap-foot>
<fn><p>Statistical Significance (TabNet-L vs. Transformer-IDS): p = 0.012, Cohen&#x2019;s d = 0.38 (Small-to-Medium Effect).</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="sec004">
<title>Conclusions</title>
<p>This study shows that the TabNet-L model outperforms the Support Vector Machine (SVM) for network attack detection using the UNSW-NB15 dataset. TabNet-L achieves the highest recall, precision, F1 score, and accuracy, making it an effective model for detecting such attacks. TabNet-L has the best overall performance, but has difficulty distinguishing between backdoor attacks and denial of service (DoS) attacks, which highlights the importance of higher recall for robust detection of different types of attacks. These findings suggest that TabNet-L may be a promising model for network intrusion detection, especially when recall is a key metric. However, in future work, the model can be further improved to overcome its difficulties in coping with different types of attacks. Despite its strong detection capabilities, TabNet-L generates a large number of false positives, making it unsuitable for direct application. To address this issue, several strategies can be adopted. Threshold tuning can reduce false positives by increasing the decision threshold for attack classification. Ensemble methods, such as combining TabNet with simpler models such as signaling model machines (SVMs) or decision trees, can improve robustness and balance model bias. In addition, applying post-processing rules or optimizing predictions based on confidence models can eliminate low-confidence alerts. These considerations, combined with continuous retraining on new labeled data (active learning), can significantly reduce false positives and improve the actual reliability of the model. Accuracy alone is insufficient for imbalanced data. Evaluation with ROC and PR curves, class-wise precision/recall, F1 per class, and explicit false-alarm rates as future work. Cost-sensitive metrics will also be discussed to highlight performance on minority classes.</p>
</sec>
<sec id="sec004-1">
<title>Threats to Validity</title>
<sec id="sec004-1-1">
<title>Internal Validity</title>
<p>Hyperparameter Optimization: Although we had equal searches on models using equal budgets, the search space might have biased other architectures. Nevertheless, we guarantee this with the use of accepted tuning protocols and a variety of random seeds.</p>
<p>Data Leakage Prevention: All the preprocessing (scaling, encoding, SMOTE) was carried out in cross-validation folds. The reproducibility of splits is guaranteed with fixed random seeds.</p>
</sec>
<sec id="sec004-1-2">
<title>External Validity</title>
<p>Dataset Bias: The models were trained and evaluated using a set of datasets (UNSW-NB15, Bot-IoT, TON_ IoT). Performance can differ on performance on traffic in considerably different network environments because of:</p>
<list list-type="bullet">
<list-item><p>Various distributions of attacks.</p></list-item>
<list-item><p>Varying network topologies</p></list-item>
<list-item><p>Unique normal traffic behaviors.</p></list-item>
</list>
<p>Cross-Dataset Generalization: In order to test robustness, we conducted holdout testing i.e. training on UNSW-NB15 and testing on TON_IoT. The quality of performance declined significantly (macro-F1 dropped to 52.1%), which indicates the issue of the domain shift. This suggests that:</p>
<list list-type="bullet">
<list-item><p>The models need to be retrained or fine-tuned into new surroundings.</p></list-item>
<list-item><p>In practice it might be required to use transfer learning methods.</p></list-item>
<list-item><p>There is the need to engage in continuous learning in order to counter the changing threats.</p></list-item>
</list>
</sec>
<sec id="sec004-1-3">
<title>Construct Validity</title>
<p>Metric Selection: Although we have presented detailed metrics, potential trade-offs of operational needs can be in different directions. Such analysis as our precision-recall curves and threshold analysis gives us the flexibility to deploy in a variety of cases.</p>
<p>Class Imbalance: The huge imbalance (particularly that of Backdoor class) is difficult to tackle even with SMOTE and class weighting. More advanced methods such as two stage classification or hybrids of anomaly detection may be required.</p>
</sec>
<sec id="sec004-1-4">
<title>Concept Drift</title>
<p>The fact that our assessment is fixed in time does not explain the changing attack tactics with time. Such continuous monitoring and retraining would have to be carried out in production to be able to maintain performance against:</p>
<list list-type="bullet">
<list-item><p>New attack variants</p></list-item>
<list-item><p>modifying network settings.</p></list-item>
<list-item><p>Adaptive adversaries</p></list-item>
</list>
</sec>
</sec>
<sec id="sec004-5">
<title>Future Work Directions</title>
<p>Hierarchical Classification: Adopt two stage classification (attack vs. benign, followed by fine-grained classification) as a way of enhancing performance of minority classes.</p>
<p>Active Learning: Design active learning pipelines, which reduce the labeling effort but keep abreast of novel threats.</p>
<p>Adaptive Thresholding: Develop dynamic thresholding mechanisms, which vary according to network environment and security posture.</p>
<p>Federated Learning: Investigate privacy-conserving IoT collaborative training.</p>
<p>Real-time Evaluation: Longitudinal studies on operational networks: To test under concept drift on performance.</p>
<sec id="sec004-5-1">
<title>Ethical and Privacy Issues</title>
<p>The use of IDS within real world networks has also given rise to some significant ethical issues which we have touched on during our research:</p>
<p>Data Privatization: All the experiments were performed with publicly available, pre-anonymized datasets whereby personal identifiers (IP addresses, user credentials) have been eliminated. The real deployment is recommended to have:</p>
<list list-type="bullet">
<list-item><p>Preprocessing on the device so as not to send unprocessed traffic.</p></list-item>
<list-item><p>Aggregated statistics both standard and differential privacy.</p></list-item>
<list-item><p>Data retention audit policies on a regular basis.</p></list-item>
</list>
<p>Qualified Disclosure: The models and code we have issued are:</p>
<p>Store of feature sets which are sanitized of sensitive network information.</p>
<list list-type="bullet">
<list-item><p>Adversarial extraction hardening of models.</p></list-item>
<list-item><p>Use policies that eliminate an evil recycling.</p></list-item>
</list>
<p>Operational Ethics: Because the false positive is so high (51.2% in binary classification), it has practical consequences:</p>
<list list-type="bullet">
<list-item><p>bombard security staff with extraneous warnings.</p></list-item>
<list-item><p>May cause unwarranted service failures in case automated blocking is activated.</p></list-item>
<list-item><p>Strict threshold calibration depending on the mode of operation.</p></list-item>
</list>
</sec>
</sec>
</sec>
</body>
<back>
<fn-group>
<fn id="n1" fn-type="other">
<p>Additional material is published online only. To view please visit the journal online.</p>
<p><bold>Cite this as:</bold> Govindaram A, Thilagavathi P, Jose Anand A, Porkodi G, Parameswari D and Geetha R. Evaluating Machine Learning Models for Intrusion Detection Systems in IoT Devices: An Experimental Study. Premier Journal of Science 2025;15:100184</p>
<p><bold>DOI:</bold> <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.70389/PJS.100184">https://doi.org/10.70389/PJS.100184</ext-link></p>
</fn>
<fn id="n2" fn-type="other">
<p><bold>Ethical approval</bold></p>
<p>N/a</p>
</fn>
<fn id="n3" fn-type="other">
<p><bold>Consent</bold></p>
<p>N/a</p>
</fn>
<fn id="n4" fn-type="other">
<p><bold>Funding</bold></p>
<p>No industry funding</p>
</fn>
<fn id="n5" fn-type="conflict">
<p><bold>Conflicts of interest</bold></p>
<p>N/a</p>
</fn>
<fn id="n6" fn-type="other">
<p><bold>Author contribution</bold></p>
<p>Anitha Govindaram, P Thilagavathi, A Jose Anand, G Porkodi, D Parameswari and R Geetha &#x2013; Conceptualization, Writing &#x2013; original draft, review and editing</p>
</fn>
<fn id="n7" fn-type="other">
<p><bold>Guarantor</bold></p>
<p>Anitha Govindaram</p>
</fn>
<fn id="n8" fn-type="other">
<p><bold>Provenance and peer-review</bold></p>
<p>Unsolicited and externally peer-reviewed</p>
</fn>
<fn id="n9" fn-type="other">
<p><bold>Data availability statement</bold></p>
<p>N/a</p>
</fn>
</fn-group>
<ref-list>
<title>References</title>
<ref id="ref1"><label>1</label><mixed-citation publication-type="journal"><string-name><surname>Kumar</surname> <given-names>P</given-names></string-name>, <string-name><surname>Verma</surname> <given-names>S</given-names></string-name>. <article-title>Analysis of Intrusion Detection Systems with a Focus on Machine Learning Techniques Using UNSW-NB15 Dataset</article-title>. <source>J Inf Secur Appl</source>. <year>2023</year>;<volume>68</volume>:<fpage>103557</fpage>.</mixed-citation></ref>
<ref id="ref2"><label>2</label><mixed-citation publication-type="journal"><string-name><surname>Sharma</surname> <given-names>V</given-names></string-name>, <string-name><surname>Kumar</surname> <given-names>P</given-names></string-name>. <article-title>A Comparative Study of Machine Learning Algorithms for Intrusion Detection Systems Using UNSW-NB15 Dataset</article-title>. <source>J Comput Netw Commun</source>. <year>2023</year>;<volume>2023</volume>:<fpage>1234567</fpage>.</mixed-citation></ref>
<ref id="ref3"><label>3</label><mixed-citation publication-type="journal"><string-name><surname>Singh</surname> <given-names>A</given-names></string-name>, <string-name><surname>Gupta</surname> <given-names>R</given-names></string-name>. <article-title>Attack Classification Using Machine Learning on UNSW-NB15 Dataset Using XGBoost Feature Selection &#x0026; Ablation Analysis</article-title>. <source>IEEE Access</source>. <year>2023</year>;<volume>11</volume>:<fpage>34356</fpage>&#x2013;<lpage>70</lpage>.</mixed-citation></ref>
<ref id="ref4"><label>4</label><mixed-citation publication-type="journal"><string-name><surname>Gupta</surname> <given-names>A</given-names></string-name>, <string-name><surname>Mehta</surname> <given-names>S</given-names></string-name>, <string-name><surname>Singh</surname> <given-names>P</given-names></string-name>. <article-title>ML-Based Intrusion Detection with Feature Analysis on Unbalanced UNSW-NB15 Dataset</article-title>. <source>Adv Intell Syst Comput</source>. <year>2023</year>;<volume>1450</volume>:<fpage>215</fpage>&#x2013;<lpage>25</lpage>.</mixed-citation></ref>
<ref id="ref5"><label>5</label><mixed-citation publication-type="journal"><string-name><surname>Kumar</surname> <given-names>P</given-names></string-name>, <string-name><surname>Verma</surname> <given-names>S</given-names></string-name>. <article-title>Analysis and Detection Against Network Attacks in the Overlapping Phenomenon of Behavior Attribute</article-title>. <source>J Comput Netw Commun</source>. <year>2023</year>;<volume>2023</volume>:<fpage>836147</fpage>.</mixed-citation></ref>
<ref id="ref6"><label>6</label><mixed-citation publication-type="journal"><string-name><surname>Kumar</surname> <given-names>S</given-names></string-name>, <string-name><surname>Singh</surname> <given-names>S</given-names></string-name>. <article-title>Network Intrusion Detection Using UNSW-NB15 Dataset: Stacking Machine Learning Based Approach</article-title>. <source>IEEE Access</source>. <year>2022</year>;<volume>10</volume>:<fpage>34312</fpage>&#x2013;<lpage>25</lpage>.</mixed-citation></ref>
<ref id="ref7"><label>7</label><mixed-citation publication-type="journal"><string-name><surname>Shaik</surname> <given-names>A</given-names></string-name>, <string-name><surname>Badruzaman</surname> <given-names>N</given-names></string-name>, <string-name><surname>Gajendran</surname> <given-names>D</given-names></string-name>, <string-name><surname>Geethalakshmi</surname> <given-names>M</given-names></string-name>, <string-name><surname>Anand</surname> <given-names>J</given-names></string-name>. <article-title>Organic Farming in Drainage System with Advanced Automation through Robotics and IoT</article-title>. <source>In: 2022 International Conference on Data Science, Agents &#x0026; Artificial Intelligence (ICDSAAI)</source>. <publisher-loc>Chennai, India</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2022</year>. p. <fpage>592</fpage>&#x2013;<lpage>7</lpage></mixed-citation></ref>
<ref id="ref8"><label>8</label><mixed-citation publication-type="journal"><string-name><surname>Zhang</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Liu</surname> <given-names>X</given-names></string-name>. <article-title>Deep Learning Approaches for Intrusion Detection Systems: A Survey</article-title>. <source>J Comput Sci Technol</source>. <year>2023</year>;<volume>38</volume>(<issue>2</issue>):<fpage>456</fpage>&#x2013;<lpage>72</lpage>.</mixed-citation></ref>
<ref id="ref9"><label>9</label><mixed-citation publication-type="journal"><string-name><surname>Maheswari</surname> <given-names>R</given-names></string-name>, <string-name><surname>Pughazhandhe</surname> <given-names>M</given-names></string-name>, <string-name><surname>Ragavan</surname> <given-names>S</given-names></string-name>, <string-name><surname>Sasikaran</surname> <given-names>R</given-names></string-name>, <string-name><surname>Siva</surname> <given-names>S</given-names></string-name>, <string-name><surname>Jose</surname> <given-names>AA</given-names></string-name>. <article-title>Augmented Reality Home Automation Using AR Switches with IoT</article-title>. <source>In: 2023 International Conference on Self Sustainable Artificial Intelligence Systems (ICSSAS)</source>. <publisher-loc>Erode, India</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2023</year>. p. <fpage>1681</fpage>&#x2013;<lpage>8</lpage>.</mixed-citation></ref>
<ref id="ref10"><label>10</label><mixed-citation publication-type="journal"><string-name><surname>Chen</surname> <given-names>Z</given-names></string-name>, <string-name><surname>Liu</surname> <given-names>J</given-names></string-name>, <string-name><surname>Shen</surname> <given-names>Y</given-names></string-name>, <string-name><surname>Simsek</surname> <given-names>M</given-names></string-name>, <string-name><surname>Kantarci</surname> <given-names>B</given-names></string-name>, <string-name><surname>Mouftah</surname> <given-names>HT</given-names></string-name>, <etal>et al.</etal> <article-title>Machine Learning-Enabled IoT Security: Open Issues and Challenges under Advanced Persistent Threats</article-title>. <source>ACM Comput Surv</source>. <year>2023</year>;<volume>55</volume>(<issue>1</issue>):<fpage>1</fpage>&#x2013;<lpage>37</lpage>.</mixed-citation></ref>
<ref id="ref11"><label>11</label><mixed-citation publication-type="journal"><string-name><surname>Moustafa</surname> <given-names>N</given-names></string-name>, <string-name><surname>Slay</surname> <given-names>J</given-names></string-name>. <article-title>UNSW-NB15: a comprehensive data set for network intrusion detection systems (UNSW-NB15 network data set)</article-title>. <source>In: 2015 Military Communications and Information Systems Conference (MilCIS)</source>. <publisher-loc>Canberra, Australia</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2015</year>. p. <fpage>1</fpage>&#x2013;<lpage>6</lpage>.</mixed-citation></ref>
<ref id="ref12"><label>12</label><mixed-citation publication-type="journal"><string-name><surname>Koroniotis</surname> <given-names>N</given-names></string-name>, <string-name><surname>Moustafa</surname> <given-names>N</given-names></string-name>, <string-name><surname>Sitnikova</surname> <given-names>E</given-names></string-name>, <string-name><surname>Turnbull</surname> <given-names>B</given-names></string-name>. <article-title>Towards the Development of Realistic Botnet Dataset in the Internet of Things for Network Forensic Analytics: Bot-IoT Dataset</article-title>. <source>Future Gener Comput Syst</source>. <year>2019</year>;<volume>100</volume>:<fpage>779</fpage>&#x2013;<lpage>96</lpage>.</mixed-citation></ref>
<ref id="ref13"><label>13</label><mixed-citation publication-type="journal"><string-name><surname>Alsaedi</surname> <given-names>A</given-names></string-name>, <string-name><surname>Moustafa</surname> <given-names>N</given-names></string-name>, <string-name><surname>Tari</surname> <given-names>Z</given-names></string-name>, <string-name><surname>Mahmood</surname> <given-names>A</given-names></string-name>, <string-name><surname>Anwar</surname> <given-names>A</given-names></string-name>. <article-title>TON_IoT Telemetry Dataset: A New Generation of IoT Benchmarking Data for Evaluating Intrusion Detection Systems</article-title>. <source>IEEE Access</source>. <year>2020</year>;<volume>8</volume>:<fpage>165130</fpage>&#x2013;<lpage>50</lpage>.</mixed-citation></ref>
<ref id="ref14"><label>14</label><mixed-citation publication-type="journal"><string-name><surname>Ahmed</surname> <given-names>M</given-names></string-name>, <string-name><surname>Khan</surname> <given-names>M</given-names></string-name>. <article-title>Intrusion Detection on the UNSW-NB15 Dataset Using Feature Selection and Classification Algorithms</article-title>. <source>Webology</source>. <year>2021</year>;<volume>18</volume>(<issue>1</issue>):<fpage>429</fpage>&#x2013;<lpage>45</lpage>.</mixed-citation></ref>
<ref id="ref15"><label>15</label><mixed-citation publication-type="journal"><string-name><surname>Patel</surname> <given-names>R</given-names></string-name>, <string-name><surname>Desai</surname> <given-names>S</given-names></string-name>. <article-title>Using Machine Learning Techniques to Identify Rare Cyber-Attacks on the UNSW-NB15 Dataset</article-title>. <source>Secur Priv</source>. <year>2022</year>;<volume>5</volume>(<issue>3</issue>):<fpage>e91</fpage>.</mixed-citation></ref>
<ref id="ref16"><label>16</label><mixed-citation publication-type="journal"><string-name><surname>Ponmalar</surname> <given-names>A</given-names></string-name>, <string-name><surname>Chandra</surname> <given-names>B</given-names></string-name>, <string-name><surname>Aarthi</surname> <given-names>S</given-names></string-name>, <string-name><surname>Bhavana</surname> <given-names>G</given-names></string-name>, <string-name><surname>Anand</surname> <given-names>AJ</given-names></string-name>, <string-name><surname>Gomathi</surname> <given-names>S</given-names></string-name>. <article-title>IoT Based Automative Drive Recorder As Black Box</article-title>. <source>In: 2022 IEEE International Conference on Computer,Power and Communications (ICCPC)</source>. <publisher-loc>Chennai, India</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2022</year>. p. <fpage>557</fpage>&#x2013;<lpage>61</lpage>.</mixed-citation></ref>
<ref id="ref17"><label>17</label><mixed-citation publication-type="journal"><string-name><surname>Ponmalar</surname> <given-names>A</given-names></string-name>, <string-name><surname>Jose</surname> <given-names>AA</given-names></string-name>, <string-name><surname>Saravanan</surname> <given-names>P</given-names></string-name>, <string-name><surname>Deeba</surname> <given-names>S</given-names></string-name>, <string-name><surname>Jyothi</surname> <given-names>BR</given-names></string-name>. <article-title>IoT Enabled Inexhaustible E-vehicle using Transparent Solar Panel</article-title>. <source>In: 2022 International Conference on Communication, Computing and Internet of Things (IC3IoT)</source>. <publisher-loc>Chennai, India</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2022</year>. p. <fpage>1</fpage>&#x2013;<lpage>5</lpage>.</mixed-citation></ref>
<ref id="ref18"><label>18</label><mixed-citation publication-type="journal"><string-name><surname>Malik</surname> <given-names>S</given-names></string-name>, <string-name><surname>Yadav</surname> <given-names>S</given-names></string-name>. <article-title>An Effective Intrusion Detection System Using Hybrid Feature Selection and Support Vector Machines on the UNSW-NB15 Dataset</article-title>. <source>Comput Netw</source>. <year>2023</year>;<volume>189</volume>:<fpage>107878</fpage>.</mixed-citation></ref>
<ref id="ref19"><label>19</label><mixed-citation publication-type="journal"><string-name><surname>Malik</surname> <given-names>S</given-names></string-name>, <string-name><surname>Yadav</surname> <given-names>S</given-names></string-name>. <article-title>An Effective Intrusion Detection System Using Hybrid Feature Selection and Support Vector Machines on the UNSW-NB15 Dataset</article-title>. <source>Comput Netw</source>. <year>2023</year>;<volume>189</volume>:<fpage>107878</fpage>.</mixed-citation></ref>
<ref id="ref20"><label>20</label><mixed-citation publication-type="journal"><string-name><surname>Govindaram</surname> <given-names>A</given-names></string-name>, <string-name><surname>Prasath</surname> <given-names>JS</given-names></string-name>, <string-name><surname>Jayasakthi</surname> <given-names>K</given-names></string-name>, <string-name><surname>Rajkumar</surname> <given-names>N</given-names></string-name>, <string-name><surname>Porkodi</surname> <given-names>G</given-names></string-name>, <string-name><surname>Anand</surname> <given-names>JA</given-names></string-name>. <article-title>Structured Process on FL for Big Data Analysis</article-title>. <source>In: 2025 6th International Conference on Mobile Computing and Sustainable Informatics (ICMCSI)</source>. <publisher-loc>Gorthgaun, Nepal</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2025</year>. p. <fpage>641</fpage>&#x2013;<lpage>7</lpage>.</mixed-citation></ref>
<ref id="ref21"><label>21</label><mixed-citation publication-type="journal"><string-name><surname>Govindaram</surname> <given-names>A</given-names></string-name>, <string-name><surname>Prasath</surname> <given-names>JS</given-names></string-name>, <string-name><surname>Suganya</surname> <given-names>A</given-names></string-name>, <string-name><surname>Jayasakthi</surname> <given-names>K</given-names></string-name>, <string-name><surname>Rajkumar</surname> <given-names>N</given-names></string-name>, <string-name><surname>Anand</surname> <given-names>JA</given-names></string-name>. <article-title>Federated Learning in Big Data with IoT for Intrusion Detection</article-title>. <source>In: 2025 6th International Conference on Mobile Computing and Sustainable Informatics (ICMCSI)</source>. <publisher-loc>Gorthgaun, Nepal</publisher-loc>: <publisher-name>IEEE</publisher-name>; <year>2025</year>. p. <fpage>252</fpage>&#x2013;<lpage>8</lpage>.</mixed-citation></ref>
</ref-list>
</back>
</article>
