<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0317183</article-id>
<article-id pub-id-type="publisher-id">PONE-D-24-30219</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Mammals</subject><subj-group><subject>Primates</subject><subj-group><subject>Monkeys</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Zoology</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Mammals</subject><subj-group><subject>Primates</subject><subj-group><subject>Monkeys</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Learning and memory</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Agriculture</subject><subj-group><subject>Animal management</subject><subj-group><subject>Animal performance</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Research and analysis methods</subject><subj-group><subject>Animal studies</subject><subj-group><subject>Experimental organism systems</subject><subj-group><subject>Animal models</subject><subj-group><subject>Rhesus monkeys</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Organisms</subject><subj-group><subject>Eukaryota</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Mammals</subject><subj-group><subject>Primates</subject><subj-group><subject>Monkeys</subject><subj-group><subject>Old World monkeys</subject><subj-group><subject>Macaque</subject><subj-group><subject>Rhesus monkeys</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Zoology</subject><subj-group><subject>Animals</subject><subj-group><subject>Vertebrates</subject><subj-group><subject>Amniotes</subject><subj-group><subject>Mammals</subject><subj-group><subject>Primates</subject><subj-group><subject>Monkeys</subject><subj-group><subject>Old World monkeys</subject><subj-group><subject>Macaque</subject><subj-group><subject>Rhesus monkeys</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject><subj-group><subject>Learning curves</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject><subj-group><subject>Learning curves</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject><subj-group><subject>Learning curves</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Learning and memory</subject><subj-group><subject>Learning</subject><subj-group><subject>Learning curves</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive neuroscience</subject><subj-group><subject>Reaction time</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive neuroscience</subject><subj-group><subject>Reaction time</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Perception</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Sensory perception</subject><subj-group><subject>Vision</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Behavior</subject><subj-group><subject>Animal behavior</subject><subj-group><subject>Animal communication</subject><subj-group><subject>Vocalization</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Behavior</subject><subj-group><subject>Animal behavior</subject><subj-group><subject>Animal communication</subject><subj-group><subject>Vocalization</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Zoology</subject><subj-group><subject>Animal behavior</subject><subj-group><subject>Animal communication</subject><subj-group><subject>Vocalization</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Monkeys can identify pictures from words</article-title>
<alt-title alt-title-type="running-head">Cross-modal association in monkeys</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<name name-style="western">
<surname>Cabrera-Ruiz</surname>
<given-names>Elizabeth</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" equal-contrib="yes" xlink:type="simple">
<name name-style="western">
<surname>Alva</surname>
<given-names>Marlen</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-2476-5214</contrib-id>
<name name-style="western">
<surname>Treviño</surname>
<given-names>Mario</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff003"><sup>3</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Mata-Herrera</surname>
<given-names>Miguel</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Vergara</surname>
<given-names>José</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff004"><sup>4</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0009-0008-6689-7304</contrib-id>
<name name-style="western">
<surname>Figueroa</surname>
<given-names>Tonatiuh</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Perez-Orive</surname>
<given-names>Javier</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-6608-3684</contrib-id>
<name name-style="western">
<surname>Lemus</surname>
<given-names>Luis</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/software/">Software</role>
<role content-type="http://credit.niso.org/contributor-roles/validation/">Validation</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Department of Cognitive Neuroscience, Instituto de Fisiología Celular, Universidad Nacional Autónoma de México, Mexico City, México</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Basic Neurosciences, Instituto Nacional de Rehabilitacion, “Luis Guillermo Ibarra Ibarra” Mexico City, México</addr-line></aff>
<aff id="aff003"><label>3</label> <addr-line>Laboratorio de Plasticidad Cortical y Aprendizaje Perceptual, Instituto de Neurociencias, Universidad de Guadalajara, Guadalajara, Jalisco, México</addr-line></aff>
<aff id="aff004"><label>4</label> <addr-line>Department of Neuroscience, Baylor College of Medicine, Houston, TX, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Vatakis</surname>
<given-names>Argiro</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Panteion University of Social and Political Sciences, GREECE</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>NO authors have competing interests.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">lemus@ifc.unam.mx</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>12</day>
<month>2</month>
<year>2025</year>
</pub-date>
<pub-date pub-type="collection">
<year>2025</year>
</pub-date>
<volume>20</volume>
<issue>2</issue>
<elocation-id>e0317183</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>7</month>
<year>2024</year>
</date>
<date date-type="accepted">
<day>23</day>
<month>12</month>
<year>2024</year>
</date>
</history>
<permissions>
<copyright-year>2025</copyright-year>
<copyright-holder>Cabrera-Ruiz et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0317183"/>
<abstract>
<p>Humans learn and incorporate cross-modal associations between auditory and visual objects (e.g., between a spoken word and a picture) into language. However, whether nonhuman primates can learn cross-modal associations between words and pictures remains uncertain. We trained two rhesus macaques in a delayed cross-modal match-to-sample task to determine whether they could learn associations between sounds and pictures of different types. In each trial, the monkeys listened to a brief sound (e.g., a monkey vocalization or a human word), and retained information about the sound to match it with one of 2–4 pictures presented on a touchscreen after a 3-second delay. We found that the monkeys learned and performed proficiently in over a dozen associations. In addition, to test their ability to generalize, we exposed them to sounds uttered by different individuals. We found that their hit rate remained high but more variable, suggesting that they perceived the new sounds as equivalent, though not identical. We conclude that rhesus monkeys can learn cross-modal associations between objects of different types, retain information in working memory, and generalize the learned associations to new objects. These findings position rhesus monkeys as an ideal model for future research on the brain pathways of cross-modal associations between auditory and visual objects.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>Consejo Nacional de Humanidades Ciencias y Tecnologias</institution>
</funding-source>
<award-id>256767</award-id>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-6608-3684</contrib-id>
<name name-style="western">
<surname>Lemus</surname>
<given-names>Luis</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution>Consejo Nacional de Humanidades Ciencias y Tecnologías</institution>
</funding-source>
<award-id>245771</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Cabrera-Ruiz</surname>
<given-names>Elizabeth</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award003">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100006087</institution-id>
<institution>Dirección General de Asuntos del Personal Académico, Universidad Nacional Autónoma de México</institution>
</institution-wrap>
</funding-source>
<award-id>PAPIIT- IN229223</award-id>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0002-6608-3684</contrib-id>
<name name-style="western">
<surname>Lemus</surname>
<given-names>Luis</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award004">
<funding-source>
<institution>Secretaría de Educación, Ciencia, Tecnología e Innovación de la Ciudad de México</institution>
</funding-source>
<award-id>SECTEI/103/2022</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Vergara</surname>
<given-names>José</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>LL received funding from the Consejo Nacional de Humanidades, Ciencias y Tecnologías (CONAHCYT; Grant Number: 256767; <ext-link ext-link-type="uri" xlink:href="https://conahcyt.mx/" xlink:type="simple">https://conahcyt.mx/</ext-link>) and the Programa de Apoyo a Proyectos de Investigación e Innovación Tecnológica (PAPIIT; Grant Number: IN229323; <ext-link ext-link-type="uri" xlink:href="https://dgapa.unam.mx/index.php/impulso-a-la-investigacion/papiit" xlink:type="simple">https://dgapa.unam.mx/index.php/impulso-a-la-investigacion/papiit</ext-link>). JV was supported by the Secretaría de Educación, Ciencia, Tecnología e Innovación de la Ciudad de México (Grant Number: SECTEI/103/2022; <ext-link ext-link-type="uri" xlink:href="https://www.sectei.cdmx.gob.mx/" xlink:type="simple">https://www.sectei.cdmx.gob.mx/</ext-link>). Elizabeth Cabrera Ruiz conducted this study to fulfill the requirements of the Programa de Doctorado en Ciencias Biomédicas at Universidad Nacional Autónoma de México and received a doctoral scholarship from the Consejo Nacional de Humanidades, Ciencias y Tecnologías (Scholarship Number: 245771; <ext-link ext-link-type="uri" xlink:href="https://conahcyt.mx/" xlink:type="simple">https://conahcyt.mx/</ext-link>). The data presented in this work form part of her doctoral dissertation. The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="0"/>
<page-count count="17"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All relevant data for this study is fully available without restrictions from the Figshare repository. The dataset can be accessed at the following DOI: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.6084/m9.figshare.27111460" xlink:type="simple">https://doi.org/10.6084/m9.figshare.27111460</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Humans form cross-modal associations (CMAs) between sounds and images, which play a vital role in integrating semantic representations within language [<xref ref-type="bibr" rid="pone.0317183.ref001">1</xref>]. Supporting this, fMRI studies have shown that the temporal lobe of the human brain is actively involved in CMAs [<xref ref-type="bibr" rid="pone.0317183.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0317183.ref003">3</xref>] between words and visual objects [<xref ref-type="bibr" rid="pone.0317183.ref004">4</xref>]. It is believed that CMAs between phonological "templates"—developed in human infants by listening to caretakers—and observed objects are essential for creating semantic representations and aiding the production of a child’s first words [<xref ref-type="bibr" rid="pone.0317183.ref005">5</xref>–<xref ref-type="bibr" rid="pone.0317183.ref008">8</xref>]. Similarly, auditory templates have been proposed as a mechanism for vocal production in birds [<xref ref-type="bibr" rid="pone.0317183.ref009">9</xref>–<xref ref-type="bibr" rid="pone.0317183.ref013">13</xref>] and marmoset monkeys [<xref ref-type="bibr" rid="pone.0317183.ref014">14</xref>]. Recent studies, such as those by Carouso-Peck and Goldstein [<xref ref-type="bibr" rid="pone.0317183.ref015">15</xref>, <xref ref-type="bibr" rid="pone.0317183.ref016">16</xref>], have also shown that visual signals during social interactions can also influence vocal production in birds. However, only a few ethological studies have suggested the existence of CMAs between vocal sounds and visual cues for semantic communication [<xref ref-type="bibr" rid="pone.0317183.ref017">17</xref>]. For instance, research has observed that vervet monkeys respond to calls signaling the presence of predators by looking upwards, downwards, or climbing into trees [<xref ref-type="bibr" rid="pone.0317183.ref018">18</xref>].</p>
<p>Neurophysiological recordings in monkeys have shown that the prefrontal cortex (PFC)—a brain area homologous to that in humans—utilizes working memory (WM) circuits [<xref ref-type="bibr" rid="pone.0317183.ref019">19</xref>] to perform CMAs between voices and faces [<xref ref-type="bibr" rid="pone.0317183.ref020">20</xref>–<xref ref-type="bibr" rid="pone.0317183.ref032">32</xref>], receiving inputs from various sensory regions [<xref ref-type="bibr" rid="pone.0317183.ref033">33</xref>–<xref ref-type="bibr" rid="pone.0317183.ref036">36</xref>]. CMAs have also been observed in the auditory and visual areas of the temporal lobe [<xref ref-type="bibr" rid="pone.0317183.ref037">37</xref>–<xref ref-type="bibr" rid="pone.0317183.ref045">45</xref>]. Notably, trained macaques have demonstrated the ability to perform cross-modal discriminations between visual and tactile objects [<xref ref-type="bibr" rid="pone.0317183.ref046">46</xref>, <xref ref-type="bibr" rid="pone.0317183.ref047">47</xref>], and between stimuli that could be considered non-ethologically relevant (NER), such as between pitch and color [<xref ref-type="bibr" rid="pone.0317183.ref048">48</xref>] and between amodal information (i.e., information that does not belong to a particular modality) [<xref ref-type="bibr" rid="pone.0317183.ref049">49</xref>] such as numerosity [<xref ref-type="bibr" rid="pone.0317183.ref050">50</xref>] and flutter frequencies [<xref ref-type="bibr" rid="pone.0317183.ref051">51</xref>–<xref ref-type="bibr" rid="pone.0317183.ref054">54</xref>]. However, it remains to be explored whether non-human primates can establish CMAs between NER stimuli that are important for human language, like words—which monkeys can discriminate phonetically [<xref ref-type="bibr" rid="pone.0317183.ref055">55</xref>, <xref ref-type="bibr" rid="pone.0317183.ref056">56</xref>], and pictures.</p>
<p>Therefore, to assess whether monkeys can form CMAs between NER stimuli, we trained two rhesus macaques in a delayed crossmodal match-to-sample task (DCMMS). We specifically designed the task to temporally separate auditory and visual stimuli, thus engaging WM circuits to retain one modality in mind while awaiting the corresponding cross-modal stimulus. Unlike prior studies, this task required the monkeys to retain auditory information during a 3-second WM period and then use this information to select a matching visual from a set of 2–4 pictures displayed simultaneously on a screen after the delay.</p>
<p>Our results show that rhesus monkeys can accurately identify sounds produced by various emitters and match them with images despite the temporal gap, highlighting the crucial role of WM circuits not only for storing information but also for actively evaluating the equivalence between stimuli of different sensory modalities. This finding suggests substantial similarities with human cognitive processing in analogous tasks [<xref ref-type="bibr" rid="pone.0317183.ref057">57</xref>, <xref ref-type="bibr" rid="pone.0317183.ref058">58</xref>] and paves the way for future neurophysiological studies focused on identifying the specific brain pathways and mechanisms involved in these cross-modal processes.</p>
</sec>
<sec id="sec002" sec-type="materials|methods">
<title>Materials and methods</title>
<sec id="sec003">
<title>Ethics statement</title>
<p>Animal welfare was a priority throughout the study, conducted in strict accordance with the recommendations of the Official Mexican Norm for the Care and Use of Laboratory Animals (NOM-062-ZOO-1999). The protocol was approved by UNAM’s IACUC (i.e., Comité Institucional para el Cuidado y Uso de Animales de Laboratorio; CICUAL; Protocol number: LLS200-22). Descriptions comply with the ARRIVE recommended guidelines [<xref ref-type="bibr" rid="pone.0317183.ref059">59</xref>]. The portrayal of one of the authors of this manuscript was used in the experiments and has given written informed consent to publish this case details.</p>
</sec>
<sec id="sec004">
<title>Subjects</title>
<p>Two adult rhesus monkeys (<italic>Macaca mulatta</italic>), a 10-year-old female (monkey G, 7 kg) and a 12-year-old male (monkey M, 12 kg) participated in the experiments. The animals had no previous training in any other task and were not subjected to any surgery or head restraint for this behavioral study. We adhered to the 3R principles (Replacement, Reduction, Refinement) [<xref ref-type="bibr" rid="pone.0317183.ref060">60</xref>]; therefore, we achieved statistical significance for the study in the number of trials each monkey performed rather than in the number of animals employed. The monkeys were housed in cages in a temperature-controlled room (22°C) with filtered air and day/night light cycles. They had free access to a balanced diet of dry food (pellets) supplemented with nuts, fresh fruits, and vegetables. Regular weight monitoring and veterinary check-ups ensured their health and well-being. The monkeys also had access to an enriched environment with toys, a recreation area for climbing and socializing with other monkeys four days a week, and opportunities for grooming through mesh sliding doors. In addition, cartoons and wildlife videos of content unrelated to the experiments, were presented on TV for no more than four hours a day. However, the face and voice of one of the researchers with whom the monkeys interacted were used during the experiments. To motivate participation in the experiments, the monkeys followed a water restriction protocol for 12–15 hours before experimental sessions (Monday to Friday, with water intake of 20–30 ml/kg achieved during the experimental sessions and ad libitum on weekends). After the 2–3-hour experimental sessions, they received 150g rations of fruits and vegetables.</p>
</sec>
<sec id="sec005">
<title>Experimental setup</title>
<p>The monkeys were trained to leave their cages and sit in a primate chair (Crist Instrument, INC.) for transfer to a soundproof booth adjacent to the vivarium for the experiments. The chair faced a touchscreen (ELO 2201L LED Display E107766, HD wide-aspect ratio 22in LCD) positioned 30 cm in front. A spring lever below the touchscreen (ENV-610M, Med Associates) allowed the monkeys to initiate the trials. Two speakers were mounted above the touchscreen: a Yamaha MSP5 Studio (40 W, 0.050–40 kHz) and a Logitech speaker (12 W, 0.01–20 kHz). These speakers delivered the sounds and background noise at 45- and 55 dB SPL, respectively. The monkeys received liquid rewards through a stainless-steel mouthpiece attached to the chair (Reward delivery system 5-RLD-E2-C Gravity feed dispenser, Crist Instrument INC.).</p>
</sec>
<sec id="sec006">
<title>Acoustic stimuli</title>
<p>The experiment utilized a variety of sounds, including laboratory recordings of words and monkey vocalizations, as well as free online sounds of cow vocalizations (<ext-link ext-link-type="uri" xlink:href="https://freesound.org/" xlink:type="simple">https://freesound.org/</ext-link>). The sounds were edited to a duration of 500ms, resampled to 44.1 kHz (with cutoff frequencies of 0.1–20 kHz), and finally normalized (RMS) with Adobe Audition® 6.0 software. The phonetic labels of words in Spanish in the text and figure legends were created using the Automatic Phonetic Transcription tool by Xavier López Morras (<ext-link ext-link-type="uri" xlink:href="http://aucel.com/pln/transbase.html" xlink:type="simple">http://aucel.com/pln/transbase.html</ext-link>).</p>
</sec>
<sec id="sec007">
<title>Visual stimuli</title>
<p>The visual stimuli consisted of a red oval, grayscale cartoons of cows and monkeys, and pictures of human and monkey faces, as well as a cow circumscribed in ovals with a resolution of 200px/sq inch. Animal pictures used in the experiment were downloaded from free online sites and customized. However, the pictures shown in figures and supplementary information are similar but not identical to the original images used in the study; they were created for illustrative purposes only using an online AI image generator (<ext-link ext-link-type="uri" xlink:href="https://www.fotor.com/ai-art-generator" xlink:type="simple">https://www.fotor.com/ai-art-generator</ext-link>).</p>
</sec>
<sec id="sec008">
<title>Delayed crossmodal match-to-sample task</title>
<p>We trained two rhesus macaques in a DCMMS task to assess their ability to establish CMAs between sounds and images temporally decoupled. Each trial began with a 1° white cross appearing in the center of the touchscreen. In response to the cross, the monkeys had to press and hold down a lever so that a 0.5-second reference sound could be delivered. After hearing the sound, the animals had to wait during a 3-second delay until 2–4 four pictures were presented simultaneously at random positions but equidistant on an 4° radius from the center of the touchscreen. The monkeys were then allowed to release the lever and select, within a 3-second response window, the picture that matched the sound (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s001">S1 Video</xref></bold>). Correct selections were rewarded with a drop of liquid.</p>
<p>After the monkeys learned the task (see the monkeys’ training section below), they were able to perform at different CMAs. Each CMA was established by associating a sound with a picture representing the same category of external stimulus (e.g., both corresponding to a human). For example, a CMA of the type ‘human’ consisted of the association between the word [si] and a human face. This way, CMAs of different types were created (e.g., monkey, cow, human, and color). In some cases, the monkeys associated a single sound with several pictures of the same type; for example, four monkey faces were associated with one ‘coo’, resulting in four ‘monkey’ CMAs (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s002">S1 Table</xref></bold>). Each CMA at <bold><xref ref-type="supplementary-material" rid="pone.0317183.s002">S1 Table</xref></bold> was established by the monkeys after many sessions of practice (see the following methods’ sections). However, in an experimental condition which we designated as the ‘perceptual invariance experiment’, we explored the monkeys’ ability to recognize sounds uttered by different individuals the monkeys did not hear before the experiment. For example, a ‘monkey’ CMA substitution set was comprised of ten different coos (i.e., auditory versions uttered by different individuals) delivered randomly in different trials, but all those trials presented the same monkey picture as a match. Finally, all experimental sessions consisted of blocks of ~300 trials of intermixed CMAs. The Hit rate (HR) corresponds to the proportion of correct responses (i.e., audio-visual match) in a session; false alarms (FA) indicate the proportion of incorrect responses. Reaction times (RT) are the times to release the lever in response to the appearance of the pictures on the touchscreen. Motor times are the intervals between the lever release and the touching of the screen. The task was programmed using LabVIEW 2014 (64-bit SP1, National Instruments®). The artwork in the task description was created using a free online platform (<ext-link ext-link-type="uri" xlink:href="https://www.fotor.com/ai-art" xlink:type="simple">https://www.fotor.com/ai-art</ext-link> generator).</p>
</sec>
<sec id="sec009">
<title>Monkeys training</title>
<p>To enhance the monkeys’ speed and efficiency in learning the DCMMS task, we tailored stimuli, durations, and rewards according to their ongoing performance. Initially, the animals were trained to produce the motor responses necessary for the task, such as pressing and releasing a lever and consistently activating the touchscreen. Rewards were given for holding down the lever when the cross appeared at the center of the touchscreen and for releasing the lever and touching the screen upon its disappearance. After the subjects completed more than 90% of the trials in consecutive sessions, we introduced a gray filled circle on the touchscreen that appeared at random positions, requiring the monkey to touch it to receive a reward. Within one or two weeks, the animals consistently reacted to the cross within a 500 ms window of appearance, maintained the lever pressed for 5–7 seconds, and released it upon the disappearance of the cross to touch the visual target.</p>
<p>In the subsequent training phase, the monkeys were required to respond to a tone (i.e., a 0.5-second 440Hz; 55 dB SPL) randomly emitted from speakers on either side of the screen. The goal was to indicate the direction of the sound by touching a right or left circle on the screen, which appeared simultaneously with the tone and then after a gradually increasing delay (from 1–3 seconds). Here, the objective was for the monkeys to associate the auditory and visual locations. However, after more than 35,000 trials (i.e., ~ 117 sessions), the performance remained at chance level. Consequently, we adopted a new approach that involved helping the monkeys to directly associate audio cues with specific images.</p>
<p>We replaced one circle with a cartoon image of a cow and added a 0.5-second broadband noise, so each trial featured either the tone or the noise. Rewards were given for correctly associating the cow cartoon with the broadband noise and the gray circle with the 440 Hz tone. From then on, sounds were delivered exclusively from a central speaker above the screen, and pictures appeared at different positions but were consistently separated by 180° (of visual angle) from each other. With this new training method, it took only a few sessions for Monkey G to begin performing above chance in associating the broadband noise with the cow cartoon (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s006">S1 Fig</xref></bold>, upper leftmost panel). With many practice sessions, performance improved above chance, prompting us to gradually introduce new sounds and images to establish various CMAs. The initial CMAs involved only two different pictures on the touchscreen, while more complex associations involved the simultaneous presentation of three or four pictures.</p>
</sec>
<sec id="sec010">
<title>Learning measurements</title>
<p>Although the primary goal of our experiments was not to explore the learning process of macaques, we noted behavioral improvements throughout the sessions we aimed to document. To quantify this, we fitted learning curves to the performance at each CMA across sessions, thereby assessing the monkeys’ learning progress. For this analysis, we applied the Rescorla-Wagner model, a well-established framework in associative learning [<xref ref-type="bibr" rid="pone.0317183.ref061">61</xref>], which explains learning as the formation of associations between conditioned and unconditioned stimuli. The process of deriving the learning curves required solving the following ordinary differential equation:
<disp-formula id="pone.0317183.e001">
<alternatives>
<graphic id="pone.0317183.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0317183.e001" xlink:type="simple"/>
<mml:math display="block" id="M1">
<mml:mfrac><mml:mrow><mml:mi>d</mml:mi><mml:mi>V</mml:mi></mml:mrow><mml:mrow><mml:mi>d</mml:mi><mml:mi>t</mml:mi></mml:mrow></mml:mfrac><mml:mo>=</mml:mo><mml:mi>α</mml:mi><mml:mi>β</mml:mi><mml:mo>(</mml:mo><mml:mrow><mml:mi>λ</mml:mi><mml:mo>−</mml:mo><mml:mi>V</mml:mi></mml:mrow><mml:mo>)</mml:mo>
</mml:math>
</alternatives>
<label>Eq (1)</label>
</disp-formula></p>
<p>This equation describes the progression of associative strength (<italic>V</italic>) in response to trained conditioned stimuli, dependent on the number of training trials (<italic>t</italic>). The model provided the parameters for this equation: <italic>αβ</italic>, which is the product of the salience of the conditioned stimuli and the strength of the unconditioned stimuli (assumed constant during training, though modifications are possible [<xref ref-type="bibr" rid="pone.0317183.ref062">62</xref>]), and λ, representing the maximum possible associative strength towards the unconditioned stimulus. From the learning curves derived from this model, we extracted three additional parameters. Y0 measured initial performance, representing the starting point of the curve along the Y-axis. Parameter γ, indicating statistical learning onset, was determined as the first session in which performance reliably exceeded chance, defined as surpassing two standard deviations from the mean probability of a correct response under a binomial distribution (where p = chance level, and n = average number of trials per session). Finally, the derivatives of these learning curves, coupled with predefined thresholds, allowed us to determine the ‘trend-to-optimal’ experimental session for each CMA (δ), marking the session where changes in performance from one session to the next did not exceed a designated minimal rate of improvement of y’ <italic>=</italic> 0.01, indicating an approach towards a learning plateau.</p>
</sec>
<sec id="sec011">
<title>Statistical analysis</title>
<p>We focused most of our analyses on data collected post-training, after the monkeys’ performance reached an asymptotic level, with their choices consistently exceeding the chance level. We used various statistical tests to compare RTs across different conditions. These tests included Spearman rank correlations to test the relationship between reaction time distributions and the number of pictures on the touchscreen and a Kruskal-Wallis test for differences between CMAs. If the Kruskal-Wallis’s test indicated a significant difference, we followed up with Mann-Whitney tests to compare conditions such as trials having 2 or 3 pictures when the same sound was presented. Finally, Bonferroni <italic>post hoc</italic> tests were used for multiple comparisons. The monkeys’ chance performance threshold depended on the number of pictures displayed; for monkey M, the chance was 0.5 since it was performed in two picture sets only, while for monkey G it was 0.25 at four picture sets. Analyses were performed using MATLAB R2022 (MathWorks).</p>
</sec>
</sec>
<sec id="sec012" sec-type="results">
<title>Results</title>
<p>To investigate the ability of two rhesus monkeys to form CMAs between auditory and visual stimuli, we engaged them in a DCMMS task. Each trial commenced with the monkeys hearing a reference sound, followed by a 3-second delay, after which 2–4 pictures were displayed on the screen. Their task was to identify on the touchscreen the picture that corresponded to the sound (<bold><xref ref-type="fig" rid="pone.0317183.g001">Fig 1A</xref></bold>). The monkeys mastered fourteen CMAs after associating six distinct sounds—including broadband noise, animal vocalizations like a coo and a moo, and words such as [‘tsan. gi], [si], and [‘ro. xo]—with fourteen images (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s002">S1 Table</xref></bold>). The trials varied, presenting either 2–4 pictures for Monkey G, while consistently presenting 2 pictures for Monkey M. Illustrative examples of four CMAs are depicted in <bold><xref ref-type="fig" rid="pone.0317183.g001">Fig 1B</xref></bold>. <bold><xref ref-type="fig" rid="pone.0317183.g001">Fig 1C and 1D</xref></bold> shows the monkeys’ hit rate (HR) and false alarm rate (FA) across these CMAs (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s003">S2 Table</xref></bold>). For example, when a coo sound was used as the reference, Monkey G correctly matched it with the monkey face 87.43% of the time, while its most frequent incorrect choice was the cow face, selected 5.94% of the time (<bold><xref ref-type="fig" rid="pone.0317183.g001">Fig 1C</xref></bold>, open boxplots). Overall, Monkey G exhibited a HR of 85.12% ± 9.11 (mean ± SD), and Monkey M achieved a HR of 87.07% ± 5.71. Statistical analysis showed no bias in their selection of specific positions on the touchscreen (one-way ANOVA with multiple pairwise comparisons; Tukey’s HSD, p &lt; 0.05) (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s007">S2 Fig</xref></bold>). These outcomes indicate that both monkeys proficiently learned to discriminate each sound, against 2 to 4 pictures.</p>
<fig id="pone.0317183.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0317183.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Delayed crossmodal match-to-sample task.</title>
<p><bold>(A)</bold> Task Events. A trial begins with the monkey pressing a lever in response to a cross appearing in the center of the touchscreen. This is followed by a 0.5-second reference sound, succeeded by a 3-second delay. After the delay, 2–4 pictures are simultaneously presented on the touchscreen. The monkey must then release the lever and touch the picture that matches the sample sound to receive a reward. LP indicates lever press. <bold>(B)</bold> Examples of Crossmodal Associations. Each column displays a CMA between a sound, represented visually by its sonogram and spectrogram and a picture. The sounds, marked in black, include two Spanish words (in IPA notation) and vocalizations of a monkey and a cow. <bold>(C)</bold> HR (close boxplots) and FAs (open boxplots) during the presentations of the CMAs shown in <bold>B</bold>. The dashed line indicates the performance at chance level (i.e., 25% for sounds discriminated against four pictures). The reference sound is labeled in red at the top of the graph. <bold>(D)</bold> Same as in <bold>C</bold>, but for Monkey M. The dashed line is set at the 50% chance level (i.e., two pictures on the screen). The pictures are similar but not identical to the original images used in the study and are therefore for illustrative purposes only.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.g001" xlink:type="simple"/>
</fig>
<sec id="sec013">
<title>Rhesus monkeys can learn cross-modal associations between stimuli of different types</title>
<p>The monkeys successfully established each CMA after several sessions of engaging in the DCMMS task, during which we initially presented two pictures simultaneously; only one of which corresponded to the played sound. To investigate the learning dynamics, we measured four learning parameters derived from fitting simple associative learning curves to the performance data across sessions. These parameters included the HR in the first session (y0), and the sessions marking statistical learning (γ), increasing learning (δ), and asymptote of learning (λ), respectively (refer to Methods for detailed descriptions). The left panel in <bold><xref ref-type="fig" rid="pone.0317183.g002">Fig 2A</xref></bold> illustrates Monkey G’s performance for the CMA between the coo sound and the monkey cartoon across sessions. Initially, the performance before learning was at chance level (~300 trials; see <xref ref-type="sec" rid="sec002">methods</xref> section on monkeys’ training and learning measurements), aligning with the intersection of the learning curve (black line) and the Y-axis, termed Y0. Subsequently, the γ performance level was reached after eight sessions from Y0 (~2700 trials); this level is defined as the session when the HR was above chance, marked by the intersection between the left edge of the gray box and the learning curve. A consistent increase in HR continued until the 15th session, reaching δ performance (right edge of the gray box), and by approximately the 40th session, the performance stabilized at the λ level, where changes in performance from one session to the next were insignificant. Similarly, middle and right panels in <bold><xref ref-type="fig" rid="pone.0317183.g002">Fig 2A</xref></bold> show two CMAs learned in trials ending with 3 and 4 pictures, respectively.</p>
<fig id="pone.0317183.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0317183.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Learning CMAs in monkeys.</title>
<p><bold>(A)</bold> Monkey G’s learning progress for three CMAs across sessions with trials presenting 2, 3, or 4 pictures simultaneously on the screen. The black line represents the average performance across sessions, while the blue line maps the first derivative of performance over training sessions (y’ values), illustrating the rate of change at each session. The initial HR (Y0) was near chance level (indicated by the black line at the ordinates), followed by γ (the left edge of the gray box), where the HR statistically exceeded chance. The learning parameter δ, marks a period when HR increased consistently above chance, culminating in a performance plateau at the session denoted by the asymptote of learning λ. <bold>(B)</bold> Sessions before δ for each CMAs. <bold>(C)</bold> Represents the average performance of Monkey G across all CMAs over the sessions. <bold>(D)</bold> Same as in <bold>C</bold>, but for Monkey M. The pictures are similar but not identical to the original images used in the study and are therefore for illustrative purposes only.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.g002" xlink:type="simple"/>
</fig>
<p><bold><xref ref-type="supplementary-material" rid="pone.0317183.s006">S1 Fig</xref></bold> shows performance evolving at each CMA across sessions in monkey G. In addition, the number of sessions needed for reaching sustained performance (i.e., the δ parameter) decreased in most new CMAs as the monkeys learned the aim of the task (<bold><xref ref-type="fig" rid="pone.0317183.g002">Fig 2B</xref>)</bold>. However, for the ‘color’ CMA formed by the word [ro. xo] (Spanish for ’red’) and the red oval, Monkey G spent ~14 sessions to reach δ at the conditions where four pictures appeared on the screen. We interpret this increase in learning sessions as the result of introducing those stimuli for the first time in trials that presented four pictures on the screen. Finally, <bold><xref ref-type="fig" rid="pone.0317183.g002">Fig 2C and 2D</xref></bold> present the mean HR for all CMAs across sessions for both monkeys. We interpret the reduction in γ and δ as the monkeys solving the cognitive control of the motor behavior required for the task (procedure memory), e.g., pressing and releasing the lever and interacting correctly with the touchscreen, so that once this was done, the animals could focus only on learning the CMA associations.</p>
</sec>
<sec id="sec014">
<title>The RTs increased as a function of the selected picture and number of pictures on the touchscreen</title>
<p>To explore how different sounds and pictures influenced the monkeys’ ability to find a cross-modal match, we analyzed the RTs during hits across various CMAs. <bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3A</xref></bold> displays Monkey G’s RTs and motor times (MT) distributions across four CMAs. Notable differences are observed between the RT distributions, which pertain to the decision-making period (i.e., the time taken to decide which picture on the touchscreen matches the sound before releasing the lever). In contrast, the MT distributions, which relate to the stereotyped arm movement toward the chosen picture, showed no differences.</p>
<fig id="pone.0317183.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0317183.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Crossmodal associations influenced the monkeys’ reaction times.</title>
<p><bold>(A)</bold> Cumulative probabilities of reaction and motor times across four CMAs. <bold>(B) Left panel,</bold> pie charts displaying hit rates in sets presenting three CMAs. In all trials, the reference sound was consistently a "coo," but the match in each session was one of the four monkey pictures. Hits are depicted in colors, while false alarms (FAs), occurred when the monkey chose a non-matching picture, are shown in gray or white. <bold>Right panel,</bold> reaction time (RT) distributions of hits are illustrated with the same color coding as in the left panel. <bold>Inset,</bold> FA distributions produced in trials where one of the four monkey pictures was presented as a match, but a picture of a ‘human’ or a ‘cow’ was selected. <bold>(C)</bold> Same format as <bold>B</bold> but for ’cow’ CMAs. <bold>(D)</bold> The standard deviations (STDs) of the RT distributions increased as a function of their means during hits, false alarms (FAs), and in trials with two, three, or four pictures on the screen. <bold>(E)</bold> Plot of the monkeys’ HRs as a function of the mean RTs of hit distributions in <bold>D</bold>.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.g003" xlink:type="simple"/>
</fig>
<p>To assess whether acoustic or visual information primarily influenced the monkeys’ RT distributions, we analyzed RTs to different pictures associated with a single sound (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s004">S3 Table</xref></bold>), in trials presenting 3 pictures simultaneously. For instance, <bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3B</xref></bold> shows Monkey G’s RT distributions (right panel) during correct responses to various pictures of the type ‘monkey’ (left panel) associated with a single ‘coo’. <bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3C</xref></bold> shows the same for pictures of the type ‘cow’ associated with a single ‘moo’ sound. The RT distributions differed significantly in both instances (p &lt; 0.001, Kruskal-Wallis’s test), indicating that since the sounds were constant, the differences in RTs must have stemmed from variations among the pictures. This trend continued across all CMAs where different pictures were associated with the same sound (p &lt; 0.001 for all comparisons, post hoc Mann-Whitney U tests with Bonferroni correction); pairwise comparisons between all pictures with each sound revealed significant differences in RT distributions (p &lt; 0.01 for 71.43% of coo comparisons, 76.19% for moo comparisons, and 82.14% for [si] comparisons; Mann-Whitney U tests with Bonferroni correction). A similar effect is observed for FAs as shown in <bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3B</xref> (insets)</bold>, where the differences in RTs resulted from incorrect matchings (p &lt; 0.001, Kruskal-Wallis’s test).</p>
<p>Furthermore, <bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3D</xref></bold> shows that both the mean and the standard deviation (STD) of the RT distributions increased with the number of pictures displayed on the screen (2–4 pictures), indicating that locating the crossmodal match took longer as the number of distractor pictures increased. This tendency aligns with Weber’s Law and studies in time processing [<xref ref-type="bibr" rid="pone.0317183.ref063">63</xref>]. Here, we interpret that the variation in STDs suggests that the faster RTs likely occurred when the matching picture was found first among the pool of pictures on the screen, and longer RTs when the match was found last. Notably, these variations in RTs did not impact the accuracy across different CMAs (<bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3E</xref></bold>). These findings imply that RT was more heavily influenced by the amount of visual information processed than by differences in sounds.</p>
</sec>
<sec id="sec015">
<title>The monkeys recognized sounds uttered by different speakers</title>
<p>We explored whether the monkeys could recognize sounds of the same type they learned but uttered by different individuals they did not hear before (<bold><xref ref-type="fig" rid="pone.0317183.g004">Fig 4A</xref>, <xref ref-type="supplementary-material" rid="pone.0317183.s005">S4 Table</xref></bold>). <bold><xref ref-type="fig" rid="pone.0317183.g004">Fig 4B</xref></bold> show how Monkey G performed above the 25% chance level in 98.33% of cases (paired-sample t-test, p &lt; 0.05). Notably, the RTs during correct responses grouped into the four CMAs (i.e., pictures) used at this experiment rather than in the number of new sounds (<bold><xref ref-type="fig" rid="pone.0317183.g004">Fig 4C</xref></bold>). <bold><xref ref-type="fig" rid="pone.0317183.g004">Fig 4D</xref></bold> shows that Monkey M presented a similar effect in 3 CMAs, performing above 50% chance in trials of only two pictures on the touchscreen (i.e., 72.22% of the versions; paired-sample t-test, p &lt; 0.05) and distributing RTs by picture category (<bold><xref ref-type="fig" rid="pone.0317183.g004">Fig 4E</xref></bold>), further supporting the notion of auditory invariance. In other words, regardless of variations in sounds, the animals could recognize them. Altogether, our findings suggest that monkeys can perform CMAs based on the ability to perceive equivalences within different sounds of the same type.</p>
<fig id="pone.0317183.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0317183.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Monkeys recognized sounds uttered by different individuals.</title>
<p><bold>(A)</bold> Spectrograms from various speakers depicting the Spanish word [’ro. xo] (red). The spectrogram of the learned sound is on the left. <bold>(B)</bold> Hit rate of monkey G in all sounds’ versions. Closed boxes on the left represent HR in the learned sounds (L). Open boxes, different versions’ HR. Closed boxes on the right of each group correspond to the HR in versions comprised of double repetitions of some sounds including L. <bold>(C)</bold> Cumulative density functions of the RTs in the learned sounds (bold lines) of monkey G and their versions. Notice how the distributions group by the picture category rather than by sounds. <bold>(D)</bold> Same as in <bold>B</bold>, but for monkey M. <bold>(E)</bold> Same as <bold>C</bold>, but for monkey M. The pictures are similar but not identical to the original images used in the study and are therefore for illustrative purposes only.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.g004" xlink:type="simple"/>
</fig>
</sec>
</sec>
<sec id="sec016" sec-type="conclusions">
<title>Discussion</title>
<p>To investigate if rhesus monkeys can associate sounds with images regardless of their ethological relevance, we engaged two of these primates in a DCMMS task. To solve the task, monkeys had to retain in WM either an auditory replay or a crossmodal equivalent of the sounds (i.e., a face) and compare the memory against different pictures to find the match. Evaluation of their performance across various tests yielded two main outcomes: 1) the monkeys adeptly formed associations between sounds (e.g., animal vocalizations, words) and pictures (e.g., faces, cartoons), demonstrating human-like word-object associations that form the basis of language (<bold>Figs <xref ref-type="fig" rid="pone.0317183.g001">1</xref> and <xref ref-type="fig" rid="pone.0317183.g002">2</xref>, <xref ref-type="supplementary-material" rid="pone.0317183.s002">S1 Table</xref></bold>), and 2) these associations generalized even when the vocalizations and words they learned were uttered by different voices (<bold><xref ref-type="fig" rid="pone.0317183.g004">Fig 4</xref>, <xref ref-type="supplementary-material" rid="pone.0317183.s005">S4 Table</xref></bold>). Subsequent sections will detail these findings and explore the potential mechanisms to establish CMAs.</p>
<sec id="sec017">
<title>Rhesus macaques create crossmodal associations between sounds and images of different types</title>
<p>Previous studies demonstrated that monkeys could perform crossmodal discriminations of supramodal information such as numerosity and flutter frequencies [<xref ref-type="bibr" rid="pone.0317183.ref050">50</xref>–<xref ref-type="bibr" rid="pone.0317183.ref054">54</xref>] and learn and group numerous sounds into categories irrelevant to their ethology [<xref ref-type="bibr" rid="pone.0317183.ref055">55</xref>, <xref ref-type="bibr" rid="pone.0317183.ref056">56</xref>]. However, establishing cross-modal associations between NER categories in monkeys has proved to be challenging [<xref ref-type="bibr" rid="pone.0317183.ref037">37</xref>, <xref ref-type="bibr" rid="pone.0317183.ref046">46</xref>–<xref ref-type="bibr" rid="pone.0317183.ref048">48</xref>]. In training two rhesus monkeys in the DCMMS task, we initially encountered hurdles as the monkeys tended to disregard sounds [<xref ref-type="bibr" rid="pone.0317183.ref064">64</xref>, <xref ref-type="bibr" rid="pone.0317183.ref065">65</xref>]. To counter this, training began with sound detection and progressively moved to crossmodal associations. We obtained different learning parameters from the monkeys’ performances in each CMA across sessions (<bold><xref ref-type="fig" rid="pone.0317183.g002">Fig 2</xref></bold>).</p>
<p>During the initial training phase, the monkeys learned to interact with the task’s apparatus (i.e., pressing the lever and touching the screen), achieving controlled motor responses within one or two weeks. Learning the first CMA (i.e., a broadband noise paired with a cow cartoon) required many sessions. Subsequent CMAs achieved statistical performance in just a few sessions; however, the animals excelled at the task after many practice sessions. We found no clear evidence that learning CMAs that included possible ethologically relevant stimuli like human and monkey faces, or coos [<xref ref-type="bibr" rid="pone.0317183.ref020">20</xref>–<xref ref-type="bibr" rid="pone.0317183.ref031">31</xref>] were facilitated more than other CMAs to which they had no previous exposure. In other words, the animals learned all CMAs at similar rates, providing behavioral data that could be highly informative regarding the brain responses underlying CMAs. Future neurophysiological evidence could build on these behavioral findings.</p>
<p>Three of our results aligned with the idea that CMAs could be created from templates [<xref ref-type="bibr" rid="pone.0317183.ref005">5</xref>–<xref ref-type="bibr" rid="pone.0317183.ref012">12</xref>]: 1) monkeys learn each new CMA faster; 2) mastering a CMA requires a prolonged period, akin to learning to speak in humans; 3) the animals’ performance remained consistently high when the same vocalizations or words were presented with different voices, suggesting that the acoustic variations activated auditory templates, similar to how formants in words trigger acoustic recognition in monkeys [<xref ref-type="bibr" rid="pone.0317183.ref055">55</xref>]. Similarly, our results suggest that visual templates could create perceptual equivalence among different faces of the same type (<bold><xref ref-type="fig" rid="pone.0317183.g003">Fig 3B</xref></bold>). This is the strongest evidence to date that supports the possibility that monkeys can connect auditory and visual templates as humans do.</p>
<p>The formation of supramodal circuits linking vocalizations with other motor behaviors [<xref ref-type="bibr" rid="pone.0317183.ref012">12</xref>, <xref ref-type="bibr" rid="pone.0317183.ref013">13</xref>] has suggested that the integration process in NHPs might similarly involve motor and spatial associations across sensory modalities [<xref ref-type="bibr" rid="pone.0317183.ref066">66</xref>, <xref ref-type="bibr" rid="pone.0317183.ref067">67</xref>]. In our task, such associations were unnecessary since the animals had to match a sound with the corresponding picture, which was presented at different locations every trial. Moreover, studies exploring the convergence of crossmodal information in WM [<xref ref-type="bibr" rid="pone.0317183.ref022">22</xref>–<xref ref-type="bibr" rid="pone.0317183.ref027">27</xref>, <xref ref-type="bibr" rid="pone.0317183.ref048">48</xref>] indicate that while motor or spatial associations may facilitate initial learning, more abstract associations such as numerosity or flutter [<xref ref-type="bibr" rid="pone.0317183.ref050">50</xref>, <xref ref-type="bibr" rid="pone.0317183.ref054">54</xref>], extending beyond immediate and innate categories, can be developed through direct CMAs. Therefore, the monkeys performing our task could have created direct connections between auditory and visual templates.</p>
</sec>
<sec id="sec018">
<title>Working memory mechanisms for crossmodal matching</title>
<p>In contrast to other tasks [<xref ref-type="bibr" rid="pone.0317183.ref021">21</xref>, <xref ref-type="bibr" rid="pone.0317183.ref037">37</xref>], our monkeys had to retain information about sounds over a 3-second delay and use it to compare with different pictures until they found a match, similar to previous work on the intra- and cross-modal discrimination of flutter [<xref ref-type="bibr" rid="pone.0317183.ref051">51</xref>–<xref ref-type="bibr" rid="pone.0317183.ref054">54</xref>]. Given that the animals performed above chance in all CMAs, and strategies such as selecting a particular picture or location cannot explain their performance (<bold><xref ref-type="supplementary-material" rid="pone.0317183.s007">S2 Fig</xref></bold>), we conclude that the most parsimonious explanation was the cross-modal matching of sounds and pictures. In other words, monkeys must have retained information about the sounds in WM to find the cross-modal match presented 3 seconds later. A candidate brain region for the type of WM involved in our task is the PFC [<xref ref-type="bibr" rid="pone.0317183.ref019">19</xref>], which participates in the retaining of parametric and nonparametric information of different sensory modalities compared intra- or cross-modally [<xref ref-type="bibr" rid="pone.0317183.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0317183.ref003">3</xref>, <xref ref-type="bibr" rid="pone.0317183.ref020">20</xref>–<xref ref-type="bibr" rid="pone.0317183.ref031">31</xref>]. Notably, the PFC is also responsible for intramodal associations of stimuli separated in space and time [<xref ref-type="bibr" rid="pone.0317183.ref050">50</xref>]. Therefore, it is probably capable of translating information cross-modally; in our task, this could involve possibly invoking visual representations after hearing sounds, thus retaining visual information in WM for later comparisons with the pictures, rather than keeping the reference sound in working memory until the pictures appear.</p>
<p>On the other hand, it is well documented that PFC activity in the context of CMAs is activated by ethologically relevant stimuli such as conspecific faces and voices in monkeys not engaged in their active recognitions [<xref ref-type="bibr" rid="pone.0317183.ref026">26</xref>, <xref ref-type="bibr" rid="pone.0317183.ref042">42</xref>]. This suggests that ethologically relevant circuits could be established there since birth [<xref ref-type="bibr" rid="pone.0317183.ref026">26</xref>, <xref ref-type="bibr" rid="pone.0317183.ref031">31</xref>]. Therefore, active cross-modal discrimination and the learning of CMAs between non-ethological stimuli may occur in other areas of the temporal lobe, known to represent and integrate auditory and visual objects [<xref ref-type="bibr" rid="pone.0317183.ref037">37</xref>–<xref ref-type="bibr" rid="pone.0317183.ref045">45</xref>], showing activations to superimposed audiovisual stimuli [<xref ref-type="bibr" rid="pone.0317183.ref037">37</xref>], perhaps to facilitate the recognition of individuals within their social group [<xref ref-type="bibr" rid="pone.0317183.ref026">26</xref>]. However, only future neurophysiological experiments in monkeys trained in the DCMMS task would reveal not only how and where in the brain non-ethological auditory and visual categories are learned, stored, and associated cross-modally, but also whether auditory or visual images invoked by sounds are retained in WM during the resolution of the task.</p>
</sec>
</sec>
<sec id="sec019" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pone.0317183.s001" mimetype="video/mp4" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s001" xlink:type="simple">
<label>S1 Video</label>
<caption>
<title>Monkey G performing the DCMMS task.</title>
<p>(MP4)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0317183.s002" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s002" xlink:type="simple">
<label>S1 Table</label>
<caption>
<title>Monkeys’ learning parameters and hit rate.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0317183.s003" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s003" xlink:type="simple">
<label>S2 Table</label>
<caption>
<title>Overall hit rate (mean ± STD) in four CMAs.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0317183.s004" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s004" xlink:type="simple">
<label>S3 Table</label>
<caption>
<title>The proportion (mean ± STD) of pictures selected.</title>
<p>Selections of pictures during hits and FAs in the condition when one sound was associated with different pictures of the same type.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0317183.s005" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s005" xlink:type="simple">
<label>S4 Table</label>
<caption>
<title>Hit rate (mean ± STD) in different versions of the learned sounds.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0317183.s006" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s006" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>Learning of CMAs.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0317183.s007" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s007" xlink:type="simple">
<label>S2 Fig</label>
<caption>
<title>Hit rate and reaction times at different picture locations.</title>
<p>To analyze biases toward selecting a P at any angle from the center of the touchscreen, we performed a one-way ANOVA, False Discovery Rate corrected for multiple pairwise comparisons. Monkey M showed no location bias (p-values &gt; 0.034). Monkey G, however, exhibited a significant effect for the monkey face position (F [15, 160.67] = 1.97; p = 0.014) and the cow face (F [15, 150.619] = 2.51; p = 0.001), but not for the human (p = 0.988). Post-hoc analysis (Tukey’s HSD) revealed these differences occurred in angles &lt; 90° within each screen quadrant. In other words, while there were biases in selecting pictures at angles, there was no consistent preference for a specific quadrant. Based on these findings, the behavioral results presented here correspond to subsequent experiments presenting pictures only in four quadrants.</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We extend our gratitude to Vani Rajendran for valuable feedback; Francisco Pérez, Gerardo Coello, and Ana María Escalante from the Computing Department of the IFC; Aurey Galván and Manuel Ortínez of the IFC workshop; and Claudia Rivera for veterinary assistance. Additionally, we thank Centenario 107 for their hospitality.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0317183.ref001"><label>1</label><mixed-citation publication-type="other" xlink:type="simple">Bowerman M, Choi S. Shaping meanings for language: universal and language–specific in the acquisition of spatial semantic categories. In: Bowerman M, Levinson S, editors. Language acquisition and conceptual development. Cambridge, UK; 2001. p. 475–511.</mixed-citation></ref>
<ref id="pone.0317183.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Beauchamp</surname> <given-names>MS</given-names></name>, <name name-style="western"><surname>Lee</surname> <given-names>KE</given-names></name>, <name name-style="western"><surname>Argall</surname> <given-names>BD</given-names></name>, <name name-style="western"><surname>Martin</surname> <given-names>A</given-names></name>. <article-title>Integration of auditory and visual information about objects in superior temporal sulcus</article-title>. <source>Neuron</source>. <year>2004</year>;<volume>41</volume>(<issue>5</issue>):<fpage>809</fpage>–<lpage>23</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/s0896-6273%2804%2900070-4" xlink:type="simple">10.1016/s0896-6273(04)00070-4</ext-link></comment> <object-id pub-id-type="pmid">15003179</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Noesselt</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Rieger</surname> <given-names>JW</given-names></name>, <name name-style="western"><surname>Schoenfeld</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Kanowski</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Hinrichs</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Heinze</surname> <given-names>HJ</given-names></name>, <etal>et al</etal>. <article-title>Audiovisual temporal correspondence modulates human multisensory superior temporal sulcus plus primary sensory cortices</article-title>. <source>Journal of Neuroscience</source>. <year>2007</year> <month>Oct</month> <day>17</day>;<volume>27</volume>(<issue>42</issue>):<fpage>11431</fpage>–<lpage>41</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.2252-07.2007" xlink:type="simple">10.1523/JNEUROSCI.2252-07.2007</ext-link></comment> <object-id pub-id-type="pmid">17942738</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mesulam</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Wieneke</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Hurley</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Rademaker</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Thompson</surname> <given-names>CK</given-names></name>, <name name-style="western"><surname>Weintraub</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Rogalski</surname> <given-names>EJ</given-names></name>. <article-title>Words and objects at the tip of the left temporal lobe in primary progressive aphasia</article-title>. <source>Brain</source>. <year>2013</year> <month>Feb</month>;<volume>136</volume>(<issue>Pt 2</issue>):<fpage>601</fpage>–<lpage>18</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/brain/aws336" xlink:type="simple">10.1093/brain/aws336</ext-link></comment> Epub 2013 Jan 29. <object-id pub-id-type="pmid">23361063</object-id>; PMCID: PMC3572925.</mixed-citation></ref>
<ref id="pone.0317183.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vihman</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Croft</surname> <given-names>W</given-names></name>. <article-title>Phonological development: Toward a “radical” templatic phonology.</article-title> <source>Linguistics</source>. <year>2007</year> <month>Jul</month> <day>20</day>;<volume>45</volume>(<issue>4</issue>):<fpage>683</fpage>–<lpage>725</lpage>.</mixed-citation></ref>
<ref id="pone.0317183.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Coffey</surname> <given-names>JR</given-names></name>, <name name-style="western"><surname>Shafto</surname> <given-names>CL</given-names></name>, <name name-style="western"><surname>Geren</surname> <given-names>JC</given-names></name>, <name name-style="western"><surname>Snedeker</surname> <given-names>J</given-names></name>. <article-title>The effects of maternal input on language in the absence of genetic confounds: Vocabulary development in internationally adopted children.</article-title> <source>Child Dev</source>. <year>2022</year> <month>Jan</month> <day>1</day>;<volume>93</volume>(<issue>1</issue>):<fpage>237</fpage>–<lpage>53</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/cdev.13688" xlink:type="simple">10.1111/cdev.13688</ext-link></comment> <object-id pub-id-type="pmid">34882780</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bloom</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Tinker</surname> <given-names>E</given-names></name>. <article-title>The intentionality model and language acquisition: engagement, effort, and the essential tension in development</article-title>. <source>Monogr Soc Res Child Dev</source>. <year>2001</year>;<volume>66</volume>(<issue>4</issue>):<fpage>1</fpage>–<lpage>91</lpage>. <object-id pub-id-type="pmid">11799833</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Locke</surname> <given-names>JL</given-names></name>. <article-title>Movement patterns in spoken language</article-title>. <source>Science</source>. <year>2000</year> <month>Apr</month> <day>21</day>;<volume>288</volume>(<issue>5465</issue>):<fpage>449</fpage>–<lpage>51</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/science.288.5465.449" xlink:type="simple">10.1126/science.288.5465.449</ext-link></comment> <object-id pub-id-type="pmid">10798981</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Goldstein</surname> <given-names>MH</given-names></name>, <name name-style="western"><surname>King</surname> <given-names>AP</given-names></name>, <name name-style="western"><surname>West</surname> <given-names>MJ</given-names></name>. <article-title>Social interaction shapes babbling: testing parallels between birdsong and speech</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2003</year> <month>Jun</month> <day>24</day>;<volume>100</volume>(<issue>13</issue>):<fpage>8030</fpage>–<lpage>5</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1332441100" xlink:type="simple">10.1073/pnas.1332441100</ext-link></comment> <object-id pub-id-type="pmid">12808137</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Mooney</surname> <given-names>R.</given-names></name> <article-title>Neurobiology of song learning</article-title>. <source>Curr Opin Neurobiol</source>. <year>2009</year> <month>Dec</month>;<volume>19</volume>(<issue>6</issue>):<fpage>654</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.conb.2009.10.004" xlink:type="simple">10.1016/j.conb.2009.10.004</ext-link></comment> <object-id pub-id-type="pmid">19892546</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Margoliash</surname> <given-names>D.</given-names></name> <article-title>Evaluating theories of bird song learning: implications for future directions</article-title>. <source>J Comp Physiol A Neuroethol Sens Neural Behav Physiol</source>. <year>2002</year> <month>Dec</month> <day>1</day>;<volume>188</volume>(<issue>11–12</issue>):<fpage>851</fpage>–<lpage>66</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1007/s00359-002-0351-5" xlink:type="simple">10.1007/s00359-002-0351-5</ext-link></comment> <object-id pub-id-type="pmid">12471486</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chen</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Matheson</surname> <given-names>LE</given-names></name>, <name name-style="western"><surname>Sakata</surname> <given-names>JT</given-names></name>. <article-title>Mechanisms underlying the social enhancement of vocal learning in songbirds</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2016</year> <month>Jun</month> <day>14</day>;<volume>113</volume>(<issue>24</issue>):<fpage>6641</fpage>–<lpage>6</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1522306113" xlink:type="simple">10.1073/pnas.1522306113</ext-link></comment> <object-id pub-id-type="pmid">27247385</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hisey</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Kearney</surname> <given-names>MG</given-names></name>, <name name-style="western"><surname>Mooney</surname> <given-names>R</given-names></name>. <article-title>A common neural circuit mechanism for internally guided and externally reinforced forms of motor learning</article-title>. <source>Nat Neurosci</source>. <year>2018</year> <month>Apr</month> <day>1</day>;<volume>21</volume>(<issue>4</issue>):<fpage>589</fpage>–<lpage>97</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/s41593-018-0092-6" xlink:type="simple">10.1038/s41593-018-0092-6</ext-link></comment> <object-id pub-id-type="pmid">29483664</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Takahashi</surname> <given-names>DY</given-names></name>, <name name-style="western"><surname>Fenley</surname> <given-names>AR</given-names></name>, <name name-style="western"><surname>Teramoto</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Narayanan</surname> <given-names>DZ</given-names></name>, <name name-style="western"><surname>Borjon</surname> <given-names>JI</given-names></name>, <name name-style="western"><surname>Holmes</surname> <given-names>P</given-names></name>, <etal>et al</etal>. <article-title>Language Development. The developmental dynamics of marmoset monkey vocal production</article-title>. <source>Science</source>. <year>2015</year> <month>Aug</month> <day>14</day>;<volume>349</volume>(<issue>6249</issue>):<fpage>734</fpage>–<lpage>8</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/science.aab1058" xlink:type="simple">10.1126/science.aab1058</ext-link></comment> <object-id pub-id-type="pmid">26273055</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Carouso-Peck</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Goldstein</surname> <given-names>MH</given-names></name>. <article-title>Female Social Feedback Reveals Non-imitative Mechanisms of Vocal Learning in Zebra Finches</article-title>. <source>Curr Biol</source>. <year>2019</year> <month>Feb</month> <day>18</day>;<volume>29</volume>(<issue>4</issue>):<fpage>631</fpage>–<lpage>636.e3</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cub.2018.12.026" xlink:type="simple">10.1016/j.cub.2018.12.026</ext-link></comment> <object-id pub-id-type="pmid">30713105</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Takahashi</surname> <given-names>DY</given-names></name>. <article-title>Vocal Learning: Shaping by Social Reinforcement</article-title>. <source>Curr Biol</source>. <year>2019</year> <month>Feb</month> <day>18</day>;<volume>29</volume>(<issue>4</issue>): <fpage>R125</fpage>–<lpage>7</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cub.2019.01.001" xlink:type="simple">10.1016/j.cub.2019.01.001</ext-link></comment> <object-id pub-id-type="pmid">30779900</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ratcliffe</surname> <given-names>VF</given-names></name>, <name name-style="western"><surname>Taylor</surname> <given-names>AM</given-names></name>, <name name-style="western"><surname>Reby</surname> <given-names>D</given-names></name>. <article-title>Cross-modal correspondences in non-human mammal communication.</article-title> <source>Multisens Res</source>. <year>2016</year>;<volume>74</volume>(<issue>5657</issue>):<fpage>49</fpage>–<lpage>91</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1163/22134808-00002509" xlink:type="simple">10.1163/22134808-00002509</ext-link></comment> <object-id pub-id-type="pmid">27311291</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Seyfarth</surname> <given-names>RM</given-names></name>, <name name-style="western"><surname>Cheney</surname> <given-names>DL</given-names></name>, <name name-style="western"><surname>Marler</surname> <given-names>P</given-names></name>. <article-title>Vervet monkey alarm calls: Semantic communication in a free-ranging primate</article-title>. <source>Anim Behav</source>. <year>1980</year>;<volume>28</volume>(<issue>4</issue>):<fpage>1070</fpage>–<lpage>94</lpage>.</mixed-citation></ref>
<ref id="pone.0317183.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romo</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Brody</surname> <given-names>CD</given-names></name>, <name name-style="western"><surname>Hernández</surname> <given-names>a</given-names></name>, <name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>. <article-title>Neuronal correlates of parametric working memory in the prefrontal cortex</article-title>. <source>Nature</source>. <year>1999</year>;<volume>399</volume>(<issue>June</issue>):<fpage>470</fpage>–<lpage>3</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/20939" xlink:type="simple">10.1038/20939</ext-link></comment> <object-id pub-id-type="pmid">10365959</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Plakke</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Hwang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>. <source>Inactivation of Primate Prefrontal Cortex Impairs Auditory and Audiovisual Working Memory</source>. <year>2015</year>;<volume>35</volume>(<issue>26</issue>):<fpage>9666</fpage>–<lpage>75</lpage>.</mixed-citation></ref>
<ref id="pone.0317183.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Diehl</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Plakke</surname> <given-names>BA</given-names></name>, <name name-style="western"><surname>Albuquerque</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>. <article-title>Representation of expression and identity by ventral prefrontal neurons</article-title>. <source>Neuroscience</source>. <year>2022</year>;<volume>496</volume>(<issue>2022</issue>):<fpage>243</fpage>–<lpage>60</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.neuroscience.2022.05.033" xlink:type="simple">10.1016/j.neuroscience.2022.05.033</ext-link></comment> <object-id pub-id-type="pmid">35654293</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hwang</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>. <article-title>Prefrontal neuronal responses during audiovisual mnemonic processing</article-title>. <source>J Neurosci</source>. <year>2015</year> <month>Jan</month> <day>21</day>;<volume>35</volume>(<issue>3</issue>):<fpage>960</fpage>–<lpage>71</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.1328-14.2015" xlink:type="simple">10.1523/JNEUROSCI.1328-14.2015</ext-link></comment> <object-id pub-id-type="pmid">25609614</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sugihara</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Diltz</surname> <given-names>MD</given-names></name>, <name name-style="western"><surname>Averbeck</surname> <given-names>BB</given-names></name>, <name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>. <article-title>Integration of auditory and visual communication information in the primate ventrolateral prefrontal cortex</article-title>. <source>J Neurosci</source>. <year>2006</year>;<volume>26</volume>(<issue>43</issue>):<fpage>11138</fpage>–<lpage>47</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.3550-06.2006" xlink:type="simple">10.1523/JNEUROSCI.3550-06.2006</ext-link></comment> <object-id pub-id-type="pmid">17065454</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Sharma</surname> <given-names>KK</given-names></name>. <article-title>Multisensory interactions of face and vocal information during perception and memory in ventrolateral prefrontal cortex</article-title>. <source>Philos Trans R Soc Lond B Biol Sci</source>. <year>2023</year> <month>Sep</month> <day>25</day>;<volume>378</volume>(<issue>1886</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1098/rstb.2022.0343" xlink:type="simple">10.1098/rstb.2022.0343</ext-link></comment> <object-id pub-id-type="pmid">37545305</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>. <article-title>Representation and integration of auditory and visual stimuli in the primate ventral lateral prefrontal cortex</article-title>. <source>Cereb Cortex</source>. <year>2007</year> <month>Sep</month>;<volume>17</volume>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/cercor/bhm099" xlink:type="simple">10.1093/cercor/bhm099</ext-link></comment> <object-id pub-id-type="pmid">17634387</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Adachi</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Hampton</surname> <given-names>RR</given-names></name>. <article-title>Rhesus monkeys see who they hear: Spontaneous cross-modal memory for familiar conspecifics.</article-title> <source>PLoS One.</source> <year>2011</year>;<volume>6</volume>(<issue>8</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0023345" xlink:type="simple">10.1371/journal.pone.0023345</ext-link></comment> <object-id pub-id-type="pmid">21887244</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Diehl</surname> <given-names>MM</given-names></name>, <name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>. <source>Responses of Prefrontal Multisensory Neurons to Mismatching Faces and Vocalizations</source>. <year>2014</year>;<volume>34</volume>(<issue>34</issue>):<fpage>11233</fpage>–<lpage>43</lpage>.</mixed-citation></ref>
<ref id="pone.0317183.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Averbeck</surname> <given-names>BB</given-names></name>, <name name-style="western"><surname>Diltz</surname> <given-names>M</given-names></name>. <article-title>Neural representation of vocalizations in the primate ventrolateral prefrontal cortex</article-title>. <source>J Neurophysiol</source>. <year>2005</year> <month>Feb</month>;<volume>93</volume>(<issue>2</issue>):<fpage>734</fpage>–<lpage>47</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1152/jn.00675.2004" xlink:type="simple">10.1152/jn.00675.2004</ext-link></comment> <object-id pub-id-type="pmid">15371495</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Goldman-Rakic</surname> <given-names>PS</given-names></name>. <article-title>An auditory domain in primate prefrontal cortex</article-title>. <source>Nat Neurosci</source>. <year>2002</year>;<volume>5</volume>(<issue>1</issue>):<fpage>15</fpage>–<lpage>6</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/nn781" xlink:type="simple">10.1038/nn781</ext-link></comment> <object-id pub-id-type="pmid">11753413</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cohen</surname> <given-names>YE</given-names></name>, <name name-style="western"><surname>Theunissen</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Russ</surname> <given-names>BE</given-names></name>, <name name-style="western"><surname>Gill</surname> <given-names>P</given-names></name>. <article-title>Acoustic features of rhesus vocalizations and their representation in the ventrolateral prefrontal cortex</article-title>. <source>J Neurophysiol</source>. <year>2007</year>;<volume>97</volume>(<issue>2</issue>):<fpage>1470</fpage>–<lpage>84</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1152/jn.00769.2006" xlink:type="simple">10.1152/jn.00769.2006</ext-link></comment> <object-id pub-id-type="pmid">17135477</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gifford</surname> <given-names>GW</given-names></name>, <name name-style="western"><surname>MacLean</surname> <given-names>K a</given-names></name>, <name name-style="western"><surname>Hauser</surname> <given-names>MD</given-names></name>, <name name-style="western"><surname>Cohen</surname> <given-names>YE</given-names></name>. <article-title>The neurophysiology of functionally meaningful categories: macaque ventrolateral prefrontal cortex plays a critical role in spontaneous categorization of species-specific vocalizations.</article-title> <source>J Cogn Neurosci.</source> <year>2005</year>;<volume>17</volume>(<issue>9</issue>):<fpage>1471</fpage>–<lpage>82</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1162/0898929054985464" xlink:type="simple">10.1162/0898929054985464</ext-link></comment> <object-id pub-id-type="pmid">16197700</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Brosch</surname> <given-names>M</given-names></name>. <article-title>Neuronal activity in primate prefrontal cortex related to goal-directed behavior during auditory working memory tasks</article-title>. <source>Brain Res</source>. <year>2016</year> <month>Jun</month> <day>1</day>;<volume>1640</volume>(<issue>Pt B</issue>):<fpage>314</fpage>–<lpage>27</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.brainres.2016.02.010" xlink:type="simple">10.1016/j.brainres.2016.02.010</ext-link></comment> <object-id pub-id-type="pmid">26874071</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Eacott</surname> <given-names>MJ</given-names></name>, <name name-style="western"><surname>Gaffan</surname> <given-names>D</given-names></name>. <article-title>Inferotemporal-frontal Disconnection: The Uncinate Fascicle and Visual Associative Learning in Monkeys</article-title>. <source>Eur J Neurosci</source>. <year>1992</year>;<volume>4</volume>(<issue>12</issue>):<fpage>1320</fpage>–<lpage>32</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.1460-9568.1992.tb00157.x" xlink:type="simple">10.1111/j.1460-9568.1992.tb00157.x</ext-link></comment> <object-id pub-id-type="pmid">12106395</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref034"><label>34</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Bates</surname> <given-names>JF</given-names></name>, <name name-style="western"><surname>Goldman-Rakic</surname> <given-names>PS</given-names></name>. <article-title>Auditory belt and parabelt projections to the prefrontal cortex in the rhesus monkey</article-title>. <source>Journal of Comparative Neurology</source>. <year>1999</year>;<volume>403</volume>(<collab>April 1998</collab>):<fpage>141</fpage>–<lpage>57</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/%28sici%291096-9861%2819990111%29403%3A2%26lt%3B141%3A%3Aaid-cne1%26gt%3B3.0.co%3B2-v" xlink:type="simple">10.1002/(sici)1096-9861(19990111)403:2&lt;141::aid-cne1&gt;3.0.co;2-v</ext-link></comment> <object-id pub-id-type="pmid">9886040</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Romanski</surname> <given-names>LM</given-names></name>, <name name-style="western"><surname>Tian</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Fritz</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Mishkin</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Goldman-Rakic</surname> <given-names>PS</given-names></name>, <name name-style="western"><surname>Rauschecker</surname> <given-names>JP</given-names></name>. <article-title>Dual streams of auditory afferents target multiple domains in the primate prefrontal cortex</article-title>. <source>Nat Neurosci</source>. <year>1999</year>; <volume>2</volume>:<fpage>1131</fpage>–<lpage>6</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/16056" xlink:type="simple">10.1038/16056</ext-link></comment> <object-id pub-id-type="pmid">10570492</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gaffan</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Harrison</surname> <given-names>S</given-names></name>. <article-title>Auditory-visual associations, hemispheric specialization and temporal-frontal interaction in the rhesus monkey</article-title>. <source>Brain</source>. <year>1991</year>; <volume>114</volume>:<fpage>2133</fpage>–<lpage>44</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/brain/114.5.2133" xlink:type="simple">10.1093/brain/114.5.2133</ext-link></comment> <object-id pub-id-type="pmid">1933238</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref037"><label>37</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chandrasekaran</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Ghazanfar</surname> <given-names>AA</given-names></name>. <article-title>Dynamic faces speed up the onset of auditory cortical spiking responses during vocal detection</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2013</year>;<volume>110</volume>: <fpage>E4668</fpage>–<lpage>77</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1312518110" xlink:type="simple">10.1073/pnas.1312518110</ext-link></comment> <object-id pub-id-type="pmid">24218574</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref038"><label>38</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ghazanfar</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Chandrasekaran</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Logothetis</surname> <given-names>NK</given-names></name>. <article-title>Interactions between the superior temporal sulcus and auditory cortex mediate dynamic face/voice integration in rhesus monkeys</article-title>. <source>J Neurosci</source>. <year>2008</year>;<volume>28</volume>(<issue>17</issue>):<fpage>4457</fpage>–<lpage>69</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.0541-08.2008" xlink:type="simple">10.1523/JNEUROSCI.0541-08.2008</ext-link></comment> <object-id pub-id-type="pmid">18434524</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Foxe</surname> <given-names>JJ</given-names></name>, <name name-style="western"><surname>Schroeder</surname> <given-names>CE</given-names></name>. <article-title>The case for feedforward multisensory convergence during early cortical processing</article-title>. <source>Neuroreport</source>. <year>2005</year> <month>Apr</month> <day>4</day>;<volume>16</volume>(<issue>5</issue>):<fpage>419</fpage>–<lpage>23</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1097/00001756-200504040-00001" xlink:type="simple">10.1097/00001756-200504040-00001</ext-link></comment> <object-id pub-id-type="pmid">15770144</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref040"><label>40</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ghazanfar</surname> <given-names>AA</given-names></name>, <name name-style="western"><surname>Schroeder</surname> <given-names>CE</given-names></name>. <article-title>Is neocortex essentially multisensory?</article-title> <year>2006</year>;<volume>10</volume>(<issue>6</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.tics.2006.04.008" xlink:type="simple">10.1016/j.tics.2006.04.008</ext-link></comment> <object-id pub-id-type="pmid">16713325</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref041"><label>41</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Brosch</surname> <given-names>M</given-names></name>. <article-title>Behavior-related visual activations in the auditory cortex of nonhuman primates</article-title>. <source>Prog Neurobiol</source>. <year>2024</year> <month>Sep</month> <day>1</day>;<volume>240</volume>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.pneurobio.2024.102637" xlink:type="simple">10.1016/j.pneurobio.2024.102637</ext-link></comment> <object-id pub-id-type="pmid">38879074</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref042"><label>42</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Perrodin</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Kayser</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Logothetis</surname> <given-names>NK</given-names></name>, <name name-style="western"><surname>Petkov</surname> <given-names>CI</given-names></name>. <article-title>Auditory and visual modulation of temporal lobe neurons in voice-sensitive and association cortices</article-title>. <source>Journal of Neuroscience</source>. <year>2014</year>;<volume>34</volume>(<issue>7</issue>):<fpage>2524</fpage>–<lpage>37</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.2805-13.2014" xlink:type="simple">10.1523/JNEUROSCI.2805-13.2014</ext-link></comment> <object-id pub-id-type="pmid">24523543</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref043"><label>43</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chandrasekaran</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Ghazanfar</surname> <given-names>AA</given-names></name>. <article-title>Different neural frequency bands integrate faces and voices differently in the superior temporal sulcus</article-title>. <source>J Neurophysiol</source>. <year>2009</year> <month>Feb</month>;<volume>101</volume>(<issue>2</issue>):<fpage>773</fpage>–<lpage>88</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1152/jn.90843.2008" xlink:type="simple">10.1152/jn.90843.2008</ext-link></comment> <object-id pub-id-type="pmid">19036867</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Dahl</surname> <given-names>CD</given-names></name>, <name name-style="western"><surname>Logothetis</surname> <given-names>NK</given-names></name>, <name name-style="western"><surname>Kayser</surname> <given-names>C</given-names></name>. <article-title>Modulation of visual responses in the superior temporal sulcus by audio-visual congruency.</article-title> <source>Front Integr Neurosci.</source> <year>2010</year> <month>Apr</month>;<volume>4</volume>(<issue>APRIL 2010</issue>). <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fnint.2010.00010" xlink:type="simple">10.3389/fnint.2010.00010</ext-link></comment> <object-id pub-id-type="pmid">20428507</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref045"><label>45</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Tyree</surname> <given-names>TJ</given-names></name>, <name name-style="western"><surname>Metke</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Miller</surname> <given-names>CT</given-names></name>. <article-title>Cross-modal representation of identity in the primate hippocampus</article-title>. <source>Science</source>. <year>2023</year>;<volume>382</volume>(<issue>6669</issue>):<fpage>417</fpage>–<lpage>23</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1126/science.adf0460" xlink:type="simple">10.1126/science.adf0460</ext-link></comment> <object-id pub-id-type="pmid">37883535</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref046"><label>46</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zhou</surname> <given-names>YD</given-names></name>, <name name-style="western"><surname>Fuster</surname> <given-names>JM</given-names></name>. <article-title>Visuo-tactile cross-modal associations in cortical somatosensory cells</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2000</year>;<volume>97</volume>(<issue>17</issue>):<fpage>9777</fpage>–<lpage>82</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.97.17.9777" xlink:type="simple">10.1073/pnas.97.17.9777</ext-link></comment> <object-id pub-id-type="pmid">10944237</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Weiskrantz</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Cowey</surname> <given-names>A</given-names></name>. <article-title>Cross modal matching in the rhesus monkey using a single pair of stimuli.</article-title> <source>Neuropsychologia</source>. <year>1975</year>;<volume>13</volume>(<issue>3</issue>):<fpage>257</fpage>–<lpage>61</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/0028-3932%2875%2990001-9" xlink:type="simple">10.1016/0028-3932(75)90001-9</ext-link></comment> <object-id pub-id-type="pmid">808744</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref048"><label>48</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Fuster</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Bodner</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Kroger</surname> <given-names>JK</given-names></name>. <article-title>Cross-modal and cross-temporal association in neurons of frontal cortex</article-title>. <source>Nature</source>. <year>2000</year>;<volume>405</volume>(<issue>6784</issue>):<fpage>347</fpage>–<lpage>51</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1038/35012613" xlink:type="simple">10.1038/35012613</ext-link></comment> <object-id pub-id-type="pmid">10830963</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref049"><label>49</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Stein</surname> <given-names>BE</given-names></name>, <name name-style="western"><surname>Meredith</surname> <given-names>MA</given-names></name>. <source>The merging of the senses</source>. <publisher-loc>Cambridge, Massachusetts</publisher-loc>: <publisher-name>MIT Press</publisher-name>; <year>1993</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1162/jocn.1993.5.3.373" xlink:type="simple">10.1162/jocn.1993.5.3.373</ext-link></comment></mixed-citation></ref>
<ref id="pone.0317183.ref050"><label>50</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Nieder</surname> <given-names>A.</given-names></name> <article-title>Supramodal numerosity selectivity of neurons in primate prefrontal and posterior parietal cortices</article-title>. <source>Proceedings of the National Academy of Sciences</source>. <year>2012</year>;<volume>109</volume>(<issue>29</issue>):<fpage>11860</fpage>–<lpage>5</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1204580109" xlink:type="simple">10.1073/pnas.1204580109</ext-link></comment> <object-id pub-id-type="pmid">22761312</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref051"><label>51</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Hernández</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Luna</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Zainos</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Romo</surname> <given-names>R</given-names></name>. <article-title>Do sensory cortices process more than one sensory modality during perceptual judgments?</article-title> <source>Neuron</source>. <year>2010</year>; <volume>67</volume>:<fpage>335</fpage>–<lpage>48</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.neuron.2010.06.015" xlink:type="simple">10.1016/j.neuron.2010.06.015</ext-link></comment> <object-id pub-id-type="pmid">20670839</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Hernandez</surname> <given-names>a</given-names></name>, <name name-style="western"><surname>Romo</surname> <given-names>R</given-names></name>. <article-title>Neural encoding of auditory discrimination in ventral premotor cortex</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2009</year>; <volume>106</volume>:<fpage>14640</fpage>–<lpage>5</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.0907505106" xlink:type="simple">10.1073/pnas.0907505106</ext-link></comment> <object-id pub-id-type="pmid">19667191</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref053"><label>53</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Hernández</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Romo</surname> <given-names>R</given-names></name>. <article-title>Neural codes for perceptual discrimination of acoustic flutter in the primate auditory cortex</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2009</year>;<volume>106</volume>(<issue>23</issue>):<fpage>9471</fpage>–<lpage>6</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.0904066106" xlink:type="simple">10.1073/pnas.0904066106</ext-link></comment> <object-id pub-id-type="pmid">19458263</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref054"><label>54</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vergara</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Rivera</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Rossi-Pool</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Romo</surname> <given-names>R</given-names></name>. <article-title>A Neural Parametric Code for Storing Information of More than One Sensory Modality in Working Memory</article-title>. <source>Neuron</source>. <year>2016</year>;<volume>89</volume>(<issue>1</issue>):<fpage>54</fpage>–<lpage>62</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.neuron.2015.11.026" xlink:type="simple">10.1016/j.neuron.2015.11.026</ext-link></comment> <object-id pub-id-type="pmid">26711117</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref055"><label>55</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Melchor</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Vergara</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Figueroa</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Morán</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>. <article-title>Formant-based recognition of words and other naturalistic sounds in rhesus monkeys.</article-title> <source>Front Neurosci</source>. <year>2021</year>; <volume>15</volume>:<fpage>1</fpage>–<lpage>10</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fnins.2021.728686" xlink:type="simple">10.3389/fnins.2021.728686</ext-link></comment> <object-id pub-id-type="pmid">34776842</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref056"><label>56</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Morán</surname> <given-names>I</given-names></name>, <name name-style="western"><surname>Perez-Orive</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Melchor</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Figueroa</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>. <article-title>Auditory decisions in the supplementary motor area</article-title>. <source>Prog Neurobiol</source>. <year>2021</year>;<volume>202</volume>:<fpage>1</fpage>–<lpage>11</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.pneurobio.2021.102053" xlink:type="simple">10.1016/j.pneurobio.2021.102053</ext-link></comment> <object-id pub-id-type="pmid">33957182</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref057"><label>57</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chandrasekaran</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Lemus</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Trubanova</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Gondan</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Ghazanfar</surname> <given-names>A a</given-names></name>. <article-title>Monkeys and humans share a common computation for face/voice integration.</article-title> <source>PLoS Comput Biol.</source> <year>2011</year>;<volume>7</volume>(<issue>9</issue>).</mixed-citation></ref>
<ref id="pone.0317183.ref058"><label>58</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Majerus</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Cowan</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Péters</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Van Calster</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Phillips</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Schrouff</surname> <given-names>J</given-names></name>. <article-title>Cross-Modal Decoding of Neural Patterns Associated with Working Memory: Evidence for Attention-Based Accounts of Working Memory</article-title>. <source>Cerebral Cortex</source>. <year>2016</year> <month>Jan</month> <day>1</day>;<volume>26</volume>(<issue>1</issue>):<fpage>166</fpage>–<lpage>79</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1093/cercor/bhu189" xlink:type="simple">10.1093/cercor/bhu189</ext-link></comment> <object-id pub-id-type="pmid">25146374</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref059"><label>59</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>du Sert</surname> <given-names>NP</given-names></name>, <name name-style="western"><surname>Hurst</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Ahluwalia</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Alam</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Avey</surname> <given-names>MT</given-names></name>, <name name-style="western"><surname>Baker</surname> <given-names>M</given-names></name>, <etal>et al</etal>. <article-title>The ARRIVE guidelines 2.0: Updated guidelines for reporting animal research</article-title>. <source>PLoS Biol</source>. <year>2020</year> <month>Jul</month> <day>1</day>;<volume>18</volume>(<issue>7</issue>).</mixed-citation></ref>
<ref id="pone.0317183.ref060"><label>60</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Russell</surname> <given-names>WMS</given-names></name>, <name name-style="western"><surname>Burch</surname> <given-names>RL</given-names></name>. <source>The Principles of Humane Experimental Technique</source>. <publisher-loc>London</publisher-loc>: <publisher-name>The Universities Federation for Animal Welfare</publisher-name>; <year>1959</year>.</mixed-citation></ref>
<ref id="pone.0317183.ref061"><label>61</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Rescorla</surname> <given-names>RA</given-names></name>, <name name-style="western"><surname>Wagner</surname> <given-names>AR</given-names></name>. <chapter-title>A theory of Pavlovian conditioning: Variations in the effectiveness of reinforcement and non-reinforcement.</chapter-title> In: <name name-style="western"><surname>Black</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Prokasy</surname> <given-names>W</given-names></name>, editors. <source>In Classical conditioning II.</source> <publisher-loc>New York</publisher-loc>: <publisher-name>Appleton-Century Crofts</publisher-name>; <year>1972</year>. p. <fpage>64</fpage>–<lpage>99</lpage>.</mixed-citation></ref>
<ref id="pone.0317183.ref062"><label>62</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Treviño</surname> <given-names>M.</given-names></name> <article-title>Associative learning through acquired salience</article-title>. <source>Front Behav Neurosci.</source> <year>2016</year> <month>Jan</month> <day>11</day>; <volume>9</volume>:<fpage>168673</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3389/fnbeh.2015.00353" xlink:type="simple">10.3389/fnbeh.2015.00353</ext-link></comment> <object-id pub-id-type="pmid">26793078</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref063"><label>63</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Merchant</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Zarco</surname> <given-names>W</given-names></name>, <name name-style="western"><surname>Prado</surname> <given-names>L</given-names></name>. <article-title>Do we have a common mechanism for measuring time in the hundreds of millisecond range? Evidence from multiple-interval timing tasks</article-title>. <source>J Neurophysiol</source>. <year>2008</year> <month>Feb</month>;<volume>99</volume>(<issue>2</issue>):<fpage>939</fpage>–<lpage>49</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1152/jn.01225.2007" xlink:type="simple">10.1152/jn.01225.2007</ext-link></comment> <object-id pub-id-type="pmid">18094101</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref064"><label>64</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ng</surname> <given-names>CW</given-names></name>, <name name-style="western"><surname>Plakke</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Poremba</surname> <given-names>A</given-names></name>. <article-title>Primate auditory recognition memory performance varies with sound type</article-title>. <source>Hear Res</source>. <year>2009</year> <month>Oct</month> <day>1</day>;<volume>256</volume>(<issue>1–2</issue>):<fpage>64</fpage>–<lpage>74</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.heares.2009.06.014" xlink:type="simple">10.1016/j.heares.2009.06.014</ext-link></comment> <object-id pub-id-type="pmid">19567264</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref065"><label>65</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Scott</surname> <given-names>BH</given-names></name>, <name name-style="western"><surname>Mishkin</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Yin</surname> <given-names>P</given-names></name>. <article-title>Monkeys have a limited form of short-term memory in audition</article-title>. <source>Proc Natl Acad Sci U S A</source>. <year>2012</year> <month>Jul</month> <day>24</day>;<volume>109</volume>(<issue>30</issue>):<fpage>12237</fpage>–<lpage>41</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1073/pnas.1209685109" xlink:type="simple">10.1073/pnas.1209685109</ext-link></comment> <object-id pub-id-type="pmid">22778411</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref066"><label>66</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shushruth</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Zylberberg</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Shadlen</surname> <given-names>MN</given-names></name>. <article-title>Sequential sampling from memory underlies action selection during abstract decision-making</article-title>. <source>Curr Biol</source>. <year>2022</year> <month>May</month> <day>9</day>;<volume>32</volume>(<issue>9</issue>):<fpage>1949</fpage>–<lpage>1960.e5</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cub.2022.03.014" xlink:type="simple">10.1016/j.cub.2022.03.014</ext-link></comment> <object-id pub-id-type="pmid">35354066</object-id></mixed-citation></ref>
<ref id="pone.0317183.ref067"><label>67</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bennur</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Gold</surname> <given-names>JI</given-names></name>. <article-title>Distinct representations of a perceptual decision and the associated oculomotor plan in the monkey lateral intraparietal area</article-title>. <source>J Neurosci</source>. <year>2011</year> <month>Jan</month> <day>19</day>;<volume>31</volume>(<issue>3</issue>):<fpage>913</fpage>–<lpage>21</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1523/JNEUROSCI.4417-10.2011" xlink:type="simple">10.1523/JNEUROSCI.4417-10.2011</ext-link></comment> <object-id pub-id-type="pmid">21248116</object-id></mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pone.0317183.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0317183.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Vatakis</surname>
<given-names>Argiro</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2025</copyright-year>
<copyright-holder>Argiro Vatakis</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0317183" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">19 Aug 2024</named-content>
</p>
<p><!-- <div> -->PONE-D-24-30219<!-- </div> --></p>
<p>Crossmodal association between auditory and visual objects in rhesus monkeys</p>
<p>PLOS ONE</p>
<p>Dear Dr. Lemus,</p>
<p>Thank you for submitting your manuscript to PLOS ONE. After careful consideration, we feel that it has merit but does not fully meet PLOS ONE’s publication criteria as it currently stands. Therefore, we invite you to submit a revised version of the manuscript that addresses the points raised during the review process.</p>
<p>Please read carefully the comments posed by the reviewers and that will also address any specific concerns from my side.</p>
<p>Please submit your revised manuscript by Oct 03 2024 11:59PM. If you will need more time than this to complete your revisions, please reply to this message or contact the journal office at <email xlink:type="simple">plosone@plos.org</email>. When you're ready to submit your revision, log on to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">https://www.editorialmanager.com/pone/</ext-link> and select the 'Submissions Needing Revision' folder to locate your manuscript file.</p>
<p>Please include the following items when submitting your revised manuscript:</p>
<p><list list-type="bullet"><list-item><p>A rebuttal letter that responds to each point raised by the academic editor and reviewer(s). You should upload this letter as a separate file labeled 'Response to Reviewers'.</p></list-item><list-item><p>A marked-up copy of your manuscript that highlights changes made to the original version. You should upload this as a separate file labeled 'Revised Manuscript with Track Changes'.</p></list-item><list-item><p>An unmarked version of your revised paper without tracked changes. You should upload this as a separate file labeled 'Manuscript'.</p></list-item></list></p>
<p>If you would like to make changes to your financial disclosure, please include your updated statement in your cover letter. Guidelines for resubmitting your figure files are available below the reviewer comments at the end of this letter.</p>
<p>If applicable, we recommend that you deposit your laboratory protocols in protocols.io to enhance the reproducibility of your results. Protocols.io assigns your protocol its own identifier (DOI) so that it can be cited independently in the future. For instructions see: <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols" xlink:type="simple">https://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols</ext-link>. Additionally, PLOS ONE offers an option for publishing peer-reviewed Lab Protocol articles, which describe protocols hosted on protocols.io. Read more information on sharing protocols at <ext-link ext-link-type="uri" xlink:href="https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols" xlink:type="simple">https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols</ext-link>.</p>
<p>We look forward to receiving your revised manuscript.</p>
<p>Kind regards,</p>
<p>Argiro Vatakis</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Journal Requirements:</p>
<p>When submitting your revision, we need you to address these additional requirements.</p>
<p>1. Please ensure that your manuscript meets PLOS ONE's style requirements, including those for file naming. The PLOS ONE style templates can be found at </p>
<p><ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf</ext-link> and </p>
<p><ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf</ext-link></p>
<p>2. We note that the grant information you provided in the ‘Funding Information’ and ‘Financial Disclosure’ sections do not match. </p>
<p>When you resubmit, please ensure that you provide the correct grant numbers for the awards you received for your study in the ‘Funding Information’ section.</p>
<p>3. When completing the data availability statement of the submission form, you indicated that you will make your data available on acceptance. We strongly recommend all authors decide on a data sharing plan before acceptance, as the process can be lengthy and hold up publication timelines. Please note that, though access restrictions are acceptable now, your entire data will need to be made freely accessible if your manuscript is accepted for publication. This policy applies to all data except where public deposition would breach compliance with the protocol approved by your research ethics board. If you are unable to adhere to our open data policy, please kindly revise your statement to explain your reasoning and we will seek the editor's input on an exemption. Please be assured that, once you have provided your new statement, the assessment of your exemption will not hold up the peer review process.</p>
<p>4. We note that Figures 1-4 ans S3 includes an image of a participant in the study. </p>
<p>As per the PLOS ONE policy (<ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/submission-guidelines#loc-human-subjects-research" xlink:type="simple">http://journals.plos.org/plosone/s/submission-guidelines#loc-human-subjects-research</ext-link>) on papers that include identifying, or potentially identifying, information, the individual(s) or parent(s)/guardian(s) must be informed of the terms of the PLOS open-access (CC-BY) license and provide specific permission for publication of these details under the terms of this license. Please download the Consent Form for Publication in a PLOS Journal (<ext-link ext-link-type="uri" xlink:href="http://journals.plos.org/plosone/s/file?id=8ce6/plos-consent-form-english.pdf" xlink:type="simple">http://journals.plos.org/plosone/s/file?id=8ce6/plos-consent-form-english.pdf</ext-link>). The signed consent form should not be submitted with the manuscript, but should be securely filed in the individual's case notes. Please amend the methods section and ethics statement of the manuscript to explicitly state that the patient/participant has provided consent for publication: “The individual in this manuscript has given written informed consent (as outlined in PLOS consent form) to publish these case details”. </p>
<p>If you are unable to obtain consent from the subject of the photograph, you will need to remove the figure and any other textual identifying information or case descriptions for this individual.</p>
<p>5. Please review your reference list to ensure that it is complete and correct. If you have cited papers that have been retracted, please include the rationale for doing so in the manuscript text, or remove these references and replace them with relevant current references. Any changes to the reference list should be mentioned in the rebuttal letter that accompanies your revised manuscript. If you need to cite a retracted article, indicate the article’s retracted status in the References list and also include a citation and full reference for the retraction notice.</p>
<p>[Note: HTML markup is below. Please do not edit.]</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: Partly</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p>Reviewer #1: The manuscript of Cabrera-Ruiz is focused on macaques‘ abilities to learn audio-visual crossmodal associations in a delayed match-to-sample task.</p>
<p>Although the text is generally very well written, in my opinion the main focus/emphasis of the paper is not stated clearly enough. As far as I understand, one of the main achievements of the study is the new procedure for training macaques to perform the audio-visual cross modal task. However instead of emphasizing that achievement and discussing it properly, the authors extensively comment on (arbitrary) proposed models of cross-modal associations, which cannot be tested without application of electrophysiological methods.</p>
<p>Major comments:</p>
<p>Introduction: More information (including existing models) about cross-modal association in macaques should be provided.</p>
<p>Line 42: The authors mention that the subjects are relatively old, which makes me think that they have experience in many other tasks. It would be good to comment whether and in what way the previous tasks, which the subjects were trained in, could affect their ability to learn the crossmodal associations.</p>
<p>Line 49: What would be the influence of the environmental enrichment to the performance of the audio-visual task?</p>
<p>Line 94-102 – Monkey Training: In my opinion this is one of the most important methodological part of the paper. More details should be provided regarding the training procedure.</p>
<p>Line 146 / Fig 2C, insets: It would be interesting not just to present the FA rates, but also to show which are the most common mistakes (e.g. are monkey faces confused more often with human faces in comparison to the red oval).</p>
<p>Lines 188-195: The authors identify 3 key findings, the first 2 of which directly follow the presented results. The third finding claims that “the monkeys’ success in forming crossmodal associations required […] working memory”, which is logical, however it is unclear how it could be derived from the results.</p>
<p>Lines 208-212: In my opinion this is the most important contribution of the study and I think that it should be discussed more extensively.</p>
<p>Lines 232-270: The two presented models are very interesting, however in the context of this behavioral experiment they are completely arbitrary. I am not sure whether they should be discussed so extensively without any electrophysiological data which can support either of them.</p>
<p>Minor comments:</p>
<p>Line 2: the sentence starting with “To find out” is ambiguous. To find out what?</p>
<p>Line 98: probably “cue to” instead of “clue for”.</p>
<p>Reviewer #2: Review for “Crossmodal association between auditory and visual objects in rhesus monkeys”</p>
<p>PONE-D-24-30219</p>
<p>This paper assessed whether rhesus macaques can learn crossmodal associations, how they are perceiving the visual sets versus the auditory sets in those associations, and what models of visual or auditory memory might explain these patterns. They successfully trained two macaques on a set of crossmodal associations (CMAs), which is no trivial feat. Performance on the task is well presented and clearly above chance. Upon further assessment, macaques appear to treat different individual sounds of the same type as perceptually equivalent, whereas patterns of reaction times for different images suggests this might not be the case for the visual image sets. This has interesting implications for how macaques may form these types of representations, and the authors put forth some possible models for how these representations might be stored in memory to explain these patterns.</p>
<p>I believe this experiment is interesting and relevant to ongoing work in the field. The data is compelling, and the experimental design effectively assesses their questions. There is room for improvement in how the paper is framed, as well as in the clarity of some of the explanations and data presentation, but I believe the work itself is sound, and will be useful groundwork for future studies in the field. I recommend this paper for publication, pending some revisions based on my suggestions below.</p>
<p>General Suggestions</p>
<p>1) Framing of memory systems in the intro. Your intro only talks about assessing whether macaques can learn CMAs. This makes sense since that’s a major point of your study, but this paper isn’t really about the CMA learning itself. I spent most of the paper thinking about the learning and acquisition of CMAs, since that’s how it’s framed here, but you don’t really make any points about acquisition at all. So much of your discussion is dedicated to how those CMAs are represented and maintained in the brain, and using macaques as a model system for assessing these different possible neural models. You need to lay the groundwork for that here. Are there existing competing hypotheses about how CMAs are represented in humans or macaques that you could discuss here? At the very least you need to mention that CMAs can be held in working memory, or that memory is relevant to this process at all. Even some basic background about audio versus visual working memory and how they might be relevant here would help readers keep that framing in mind and not be blindsided by the discussion.</p>
<p>2) How you’re referring to CMAs and sets of CMAs. It took me a while to figure out how exactly your different, specific CMAs were grouped (i.e. one single coo was trained to be associated with a few different monkey images, and each of those individual associations was one CMA). It would be useful to clarify this early, and possibly give name to the different CMA type groups (i.e. CMA sets or something like that), because knowing the associations were one coo to multiple images changes how a reader might interpret the results. I know you added more coos later on, but I spent the majority of the paper thinking each CMA was made up of completely different individual sounds and images. Even the S3 table didn’t clarify this for me, as I read the list as being grouped into categories (i.e. “coos”) with the individual pictures presented, and I interpreted the letters next to each picture as specifying the name of the specific sound used with that picture, rather than as the identifying name of the picture itself.</p>
<p>3) CMA acquisition. Do you have acquisition data for every time a new CMA is added? It sounds like once monkey M initially learned a CMA type (the coo, for example), they didn’t take much training to learn to associate a new image with that coo (I believe you added in new individual CMA images gradually, correct? If that’s not the case then I’m misinterpreting the methods and they should be clarified). If they are in fact generalizing that single “coo” learning to new images, that is pretty compelling evidence for a learned label (“coo” means “choose monkey”, for example) rather than many separate individually learned associations. That seems like really relevant information for how these CMAs are acquired and represented, and would definitely be worth adding. You could plot the number of presentations they took to learn each new CMA image, and if they’ve really learned a category label, I would expect those numbers to drop drastically for CMAs acquired later.</p>
<p>4) Other crossmodal association literature. There is a fair amount of work looking at crossmodal associations in humans and animals, though it tends to be framed more towards identity recognition, that would be worth looking into and including. Here’s a useful review:</p>
<p>Perrodin, C., Kayser, C., Abel, T. J., Logothetis, N. K. &amp; Petkov, C. I. Who is That? Brain Networks and Mechanisms for Identifying Individuals. Trends in Cognitive Sciences 19, 783–796 (2015).</p>
<p>5) Arbitrary learned associations, or existing associations between animals and their calls? Because I was thinking so much about learning, I had a specific running question throughout the paper, which is whether the macaques are actually learning arbitrary associations, or coming in with existing, ecologically relevant associations of certain sounds with certain animals. I think this would be easy to fix with some additional support. If you think they’re arbitrary (and I think I agree with you), you just need to provide evidence. For example, if you have reason to believe the macaques don’t perceive faces on screen as though they’re actual macaques/humans/etc, or that you saw similar patterns even with your cartoon stimuli and the color red, or that they’ve never seen or heard cows before, or something along those lines.</p>
<p>Specific Suggestions</p>
<p>Abstract:</p>
<p>Line 1: I would be careful here. Your topic of interest is symbolic crossmodal associations, but even with this cool task, you can’t really make claims that your macaques have learned something symbolic, especially since it’s a unidirectional association and you never test the reverse (i.e. see image choose sound, which would be a bigger methods challenge). Your first line is fine, because it’s true that it is uncertain, but in the second line you need to make it clear that you’re testing for a step below symbolism, just any crossmodal associations at all, and not making any claims about whether they’re represented symbolically in macaques.</p>
<p>(see these references for more info on symbolic representations:</p>
<p>Palmer, F. R., &amp; Palmer, F. R. (1981). Semantics. Cambridge University Press.</p>
<p>Deacon, T. W. (1997). The symbolic species: The co-evolution of language and the brain (1st ed). W.W. Norton.)</p>
<p>Line 6: You say performance remained constant, but I think that undercuts you a bit. They did well, it wasn’t just constant, it was consistently high.</p>
<p>Line 9: You say “semantic and conceptual thinking at the single-neuron level,” which doesn’t really say specifically what you mean and also is never mentioned anywhere else in the paper. I would shift this to refer to what you actually conclude they could be a model for, which is, from what I can tell, the neural pathways by which semantic information stored in memory, and later applied.</p>
<p>Intro:</p>
<p>Line 18: Can you add a reference from human literature to support this claim? It’s not controversial, but there is SO much research on how words are acquired in children, it seems odd to leave it out.</p>
<p>Line 28: See first general suggestion about framing.</p>
<p>Materials and Methods:</p>
<p>Line 77: Stims were presented in an equidistant circle, but did the circle rotate? Was configuration within the circle randomized? How was the actual position and configuration decided?</p>
<p>Line 84: Specify that this is just a cue to denote the start of the trial, I was confused for a moment thinking you were presenting the actual picture before the sound was played.</p>
<p>Line 88: This would be a good place to clarify the specifics of the CMA sets.</p>
<p>Line 100: Here for example, when you say they learned new CMAs, were they all the same type? As in they started out differentiating coos and human words, and the additional CMAs were new images corresponding to coos and words? Or were they totally new CMA types, as in the addition of moos?</p>
<p>Results:</p>
<p>Line 119: In S2 Fig B, there are 3 clear dips in performance that I imagine correspond to the addition of a new CMA type/additional foil stimulus, is that right? You should mark where those additions occur on the graph. Unless I’m misinterpreting and this is plotting performance once all 4 CMA types have been added, in which case that should be clarified.</p>
<p>Line 156: They seem to have learned a category set. Coo means touch monkey. So the thing that would slow them down is potentially just how long it takes to recognize that the individual monkeys are monkeys, leading to the reaction time differences (Fig. 2 A and C). When you gave them new images with the sounds, did they generalize to new monkey pictures? This is where the acquisition data would be really useful.</p>
<p>In Fig 2D, it seems the takeaway is that when average RTs are longer their variance is higher, is that correct? So there’s more variance in RTs when they’re taking longer to decide. I wasn’t sure what this graph added until I got to the conclusions, because it seems this pattern is the evidence you’re citing for the visual imagery model. I don’t fully see why this is evidence for that model, so I could use more clarity on that front overall. As it stands I’m not sure that I see the importance of this figure.</p>
<p>Line 172: I would clarify that perceptual invariance is not actually an inability to perceive a difference between the sounds. The critical thing is that they are treating these sounds as equivalent (the way we treat a word as the same no matter whose voice is saying it, or a musical note as the same even if it’s in a different octave), but that they would still likely be able to differentiate the individual sounds if need be. That is an important distinction.</p>
<p>Possible follow-up thought, I wonder where macaques draw these category lines. Would they view humans saying different words as equivalent, or would different words each have their own set? What about monkey coos vs screams, presumably those would fall into different categories, but could they learn to lump them together into categories of “monkey” and “human” instead?</p>
<p>Discussion:</p>
<p>Line 191: A point where you could defend that you think these associations are arbitrary.</p>
<p>Line 193: This memory conclusion is surprising, would be less jarring if more theoretical background on this is added to the intro.</p>
<p>Line 220: Do you report this data anywhere?</p>
<p>Line 224-230: This explanation helped me understand why you included figure 2, I would try to bring some of this clarity up above.</p>
<p>Line 232: Interesting hypotheses being put forth but again comes out of nowhere, add better framing for it in the intro.</p>
<p>Line 261: How so? What makes the patten you see a better match for the visual model? Don’t both models require some amount of processing and decision making once the visual stimuli are presented, resulting in differences in RT based on the specifics of the image? Arguably converting an image to a sound to match against your representation of a sound would in fact require more processing at image presentation than matching an image directly to your existing image representation, so wouldn’t the auditory model potentially result in longer and more varied processing once the choice images are presented? Maybe I’m missing something, but it would be useful for you to explain to the readers why you think this model is a better match to your data.</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: <bold>Yes: </bold>Ivo D. Popivanov</p>
<p>Reviewer #2: No</p>
<p>**********</p>
<p>[NOTE: If reviewer comments were submitted as an attachment file, they will be attached to this email and accessible via the submission site. Please log into your account, locate the manuscript record, and check for the action link "View Attachments". If this link does not appear, there are no attachment files.]</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com/</ext-link>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Registration is free. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email PLOS at <email xlink:type="simple">figures@plos.org</email>. Please note that Supporting Information files do not need this step.</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pone.0317183.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0317183.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pone.0317183" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pone.0317183.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">9 Nov 2024</named-content>
</p>
<p>Dr. Argiro Vatakis</p>
<p>Academic Editor, PLOS ONE</p>
<p>Dear Prof. Vatakis,</p>
<p>We are pleased to submit a revised version of our manuscript, "Crossmodal Association between Auditory and Visual Objects in Rhesus Monkeys" (PONE-D-24-30219), which addresses all the concerns and suggestions raised by the reviewers, as well as editorial requests. We have modified the manuscript, figures, and figure captions to meet the publication criteria of PLOS ONE.</p>
<p>We believe the updated manuscript, enhanced by the reviewers' input, effectively demonstrates its contribution to the understanding of learning mechanisms in cross-modal associations and their implications in sensory processing and working memory. The revisions feature an introduction reframed to emphasize working memory and cross-modal associations in macaques, together with comprehensive methodological details on training the macaques and analyses of their performance during various learning stages. Additionally, we updated the title to "Monkeys Can Identify Pictures from Words," capturing a crucial insight from the reviewers regarding the learning of cross-modal associations between non-ethological stimuli. To address concerns about the monkeys' learning processes, we included the former S2 Fig as a new Fig 2. We have also incorporated supplementary information and new panels within the original figures to better address the reviewers’ comments and clarify the findings. We also withdrew the previous Fig 4 from the manuscript since it depicted possible models for solving cross-modal associations in the brain, which were not tested in our experiments. Nevertheless, all the results presented in the initial draft are maintained, enriched only by deeper interpretations suggested by the reviewers. We confirm the completeness and correctness of the reference list, with a few references added to the introduction to align with suggestions on the study framing.</p>
<p>We also confirm that all the figures in the manuscript (Figs 1-4, S1, S2 and S3) were created entirely in our lab. Some of them appear as a preprint in <ext-link ext-link-type="uri" xlink:href="http://bioRxiv.org" xlink:type="simple">bioRxiv.org</ext-link> (<ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1101/2024.01.05.574397" xlink:type="simple">https://doi.org/10.1101/2024.01.05.574397</ext-link>), but none of them have been previously copyrighted. The original images used in the experiments were downloaded from free online sites and the original version of the manuscript depicted them. However, we replaced all those depictions from all figures to avoid any possible copyright conflicts, and substituted them with pictures similar but not identical, created using an AI image generator (<ext-link ext-link-type="uri" xlink:href="https://www.fotor.com/ai-art-generator" xlink:type="simple">https://www.fotor.com/ai-art-generator</ext-link>). Fotor.com explicitly states that: "For the AI image generator, the AI-generated images can be used for both personal and commercial purposes. You can share them on social media platforms, use them for marketing campaigns, or sell them. You are the copyright owner of your creations and will be responsible for any output that you generate using AIGC Related Service." (<ext-link ext-link-type="uri" xlink:href="https://support.fotor.com/hc/en-us/articles/17767970123417-Are-the-AI-generated-images-commercially-available-Do-I-have-ownership-of-them" xlink:type="simple">https://support.fotor.com/hc/en-us/articles/17767970123417-Are-the-AI-generated-images-commercially-available-Do-I-have-ownership-of-them</ext-link>). Moreover, as requested by PLOS ONE, we included the following sentence in figure captions: “The pictures are similar but not identical to the original images used in the study and are therefore for illustrative purposes only.” We also included the following description at the Methods section (lines 109-113): “Animal pictures used in the experiment were downloaded from free online sites and customized. However, the pictures shown in figures and supplementary information are similar but not identical to the original images used in the study; they were created for illustrative purposes only using an online AI image generator (<ext-link ext-link-type="uri" xlink:href="https://www.fotor.com/ai-art-generator" xlink:type="simple">https://www.fotor.com/ai-art-generator</ext-link>).”  However, we included the picture of one of the researchers in the study. Therefore, we submitted the signed consent format as provided by PLOS ONE, and report at the Ethics statement (lines 65-66) the following: “The portrayal of one of the authors of this manuscript was used in the experiments and has given written informed consent to publish this case details.” </p>
<p>Regarding the sounds used in our experiments, we confirm that the cow vocalizations were downloaded from <ext-link ext-link-type="uri" xlink:href="http://Freesound.org" xlink:type="simple">Freesound.org</ext-link>, and under the following License: Creative Commons 0 (CC0; No copyright). However, all the human and monkey sounds were recorded in our lab and have not been published elsewhere. We clarify this at lines 99-101 of the methods as follows: “The experiment utilized a variety of sounds, including laboratory recordings of words and monkey vocalizations, as well as free online sounds of cow vocalizations (<ext-link ext-link-type="uri" xlink:href="https://freesound.org/" xlink:type="simple">https://freesound.org/</ext-link>).”</p>
<p>Finally, we were notified that the Funding Information, Financial Disclosure and Data Availability statements need to be clarified. Therefore, we kindly ask to include the following statements: </p>
<p>Funding Information</p>
<p>LL received grant support from the Consejo Nacional de Humanidades Ciencias y Tecnologías (CONAHCYT; Number: 256767; <ext-link ext-link-type="uri" xlink:href="https://conahcyt.mx/" xlink:type="simple">https://conahcyt.mx/</ext-link>), and the Programa de Apoyo a Proyectos de Investigación e Innovación Tecnológica (PAPIIT; Number: IN229323; <ext-link ext-link-type="uri" xlink:href="https://dgapa.unam.mx/index.php/impulso-a-la-investigacion/papiit" xlink:type="simple">https://dgapa.unam.mx/index.php/impulso-a-la-investigacion/papiit</ext-link>). JV received support by Secretaría de Educación, Ciencia, Tecnología e Innovación de la Ciudad de México (SECTEI/103/2022; <ext-link ext-link-type="uri" xlink:href="https://www.sectei.cdmx.gob.mx/" xlink:type="simple">https://www.sectei.cdmx.gob.mx/</ext-link>). Elizabeth Cabrera Ruiz conducted this study to fulfill the requirements of Programa de Doctorado en Ciencias Biomédicas of Universidad Nacional Autónoma de México and received a doctoral scholarship from Consejo Nacional de Humanidades Ciencias y Tecnologías (Scholarship number: 245771; <ext-link ext-link-type="uri" xlink:href="https://conahcyt.mx/" xlink:type="simple">https://conahcyt.mx/</ext-link>). The data in this work are part of her doctoral dissertation.</p>
<p>Financial Disclosure</p>
<p>The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</p>
<p>Data Availability Statement</p>
<p>The data is fully available without restrictions from Figshare.com <ext-link ext-link-type="uri" xlink:href="https://figshare.com/s/998043a02c8b15315632" xlink:type="simple">https://figshare.com/s/998043a02c8b15315632</ext-link></p>
<p>Competing Interests</p>
<p>No authors have competing interests</p>
<p>Author contributions</p>
<p>Conceptualization: LL. Data Curation: ECR, MMH, TF. Formal Analysis: ECR, MA, MT, MMH, JV &amp; LL. Funding Acquisition: LL, ECR, JV. Investigation: ECR, MMH. Methodology: ECR &amp; LL. Software: ECR, MA, TF, MT, &amp; LL. Validation: JPO, MT, JV &amp; LL. Visualization: ECR, MA &amp; LL. Writing original draft: LL. Review &amp; draft editing: ECR, MA, JPO, MT &amp; JV. </p>
<p>Responses to the reviewers (highlighted in blue)</p>
<p>Reviewer #1: </p>
<p>The manuscript of Cabrera-Ruiz is focused on macaques‘ abilities to learn audio-visual crossmodal associations in a delayed match-to-sample task.</p>
<p>Although the text is generally very well written, in my opinion the main focus/emphasis of the paper is not stated clearly enough. As far as I understand, one of the main achievements of the study is the new procedure for training macaques to perform the audio-visual cross modal task. However instead of emphasizing that achievement and discussing it properly, the authors extensively comment on (arbitrary) proposed models of cross-modal associations, which cannot be tested without application of electrophysiological methods.</p>
<p>The revised manuscript now emphasizes the achievement of training macaques in cross-modal discrimination. For example, line 8 of the Abstract now reads: "We found that the monkeys learned and performed proficiently in over a dozen associations." We also included full descriptions of the training protocol as a "Monkeys training" subsection at the Methods (lines 146-177), and a "Learning measurements" subsection about the analytical methods for evaluating the learning process (lines 178-203). In addition, we included a new section "Rhesus Monkeys Can Learn Cross-Modal Associations Between Stimuli of Different Types" at the Results (lines 251-294), and included S2 Fig in the text as Fig 2 (lines 277-282) with better descriptions of the learning of CMAs across training sessions, and a new panel at Fig 2B (also suggested by Reviewer 2), showing the number of sessions to performing each CMA above chance. Finally, we created a new S1 Fig showing the performance for each CMA across sessions.</p>
<p>We also agree with the reviewers about the problem of presenting models about the neuronal mechanisms of CMAs without evidence. Therefore, we decided to remove Figure Four from the document, limiting ourselves exclusively to adding information about cross-modal association models in the introduction. We now refer to the acoustic template model as a mechanism for association between modalities and to the alternative model of learning through social interactions. These models are important for the association between modalities as a basis for associations between stimuli that are not necessarily ethological but that are used in humans for language (which was what we tried to test in our study). The first paragraph of the introduction now reads as follows:</p>
<p>“Humans form cross-modal associations (CMAs) between sounds and images, which play a vital role in integrating semantic representations within language [1]. Supporting this, fMRI studies have shown that the temporal lobe of the human brain is actively involved in CMAs [2,3] between words and visual objects [4]. It is believed that CMAs between phonological "templates"—developed in human infants by listening to caretakers—and observed objects are essential for creating semantic representations and aiding the production of a child's first words [5–8]. Similarly, auditory templates have been proposed as a mechanism for vocal production in birds [9–13] and marmoset monkeys [14]. Recent studies, such as those by Carouso-Peck and Goldstein [15,16], have also shown that visual signals during social interactions can also influence vocal production in birds. However, only a few ethological studies have suggested the existence of CMAs between vocal sounds and visual cues for semantic communication [17]. For instance, research has observed that vervet monkeys respond to calls signaling the presence of predators by looking upwards, downwards, or climbing into trees [18].”</p>
<p>Major comments:</p>
<p>Introduction: More information (including existing models) about cross-modal association in macaques should be provided.</p>
<p>We included important references to cross-modal association processes in monkeys. Importantly, we also framed the introduction within the scope of working memory since it plays a key role in multisensory association processes (as noted by reviewer 2). The second paragraph of the introduction now reads as follows:</p>
<p>“Neurophysiological recordings in monkeys have shown that the prefrontal cortex (PFC) — a brain area homologous to that in humans — utilizes working memory (WM) circuits [19] to perform CMAs between voices and faces [20–32], receiving inputs from various sensory regions [33–36]. CMAs have also been observed in the auditory and visual areas of the temporal lobe [37–45]. Notably, trained macaques have demonstrated the ability to perform cross-modal discriminations between visual and tactile objects [46,47], and between stimuli that could be considered non-ethologically relevant (NER), such as between pitch and color [48] and between amodal information (i.e., information that does not belong to a particular modality) [49] such as numerosity [50] and flutter frequencies [51–54]. However, it remains to be explored whether non-human primates can establish CMAs between NER stimuli that are important for human language, like words —which monkeys can discriminate phonetically [55,56], and pictures.”</p>
<p> Line 42: The authors mention that the subjects are relatively old, which makes me think that they have experience in many other tasks. It would be good to comment whether and in what way the previous tasks, which the subjects were trained in, could affect their ability to learn the crossmodal associations. </p>
<p>The monkeys had not participated in any experiments previously nor had they been trained in any task. We now clarify this in the Methods on line 69, which now reads as follows:</p>
<p>“The animals had no previous training in any other task and were not subjected to any surgery or head restraint for this behavioral study.”</p>
<p> Line 49: What would be the influence of the environmental enrichment to the performance of the audio-visual task? </p>
<p> Environmental enrichment was implemented as part of the animals' housing, e.g., plastic boxes with snacks inside to promote manual dexterity. Additionally, the monkeys spent time in a shared space with room to climb and socialize with other monkeys, which apart from favoring the display of social behaviors characteristic of their species, probably played a role in the 'monkey' CMA. Similarly, the animals were partially exposed to the faces of the researchers (i.e., they wore masks), and only the picture of the authors was used as a stimulus. Therefore, we believe that it is unlikely it helped the monkeys significantly in creating the associations for the task. The lines 77-82 address enrichment as follows:</p>
<p>“The monkeys also had access to an enriched environment with toys, a recreation area for climbing and socializing with other monkeys four days a week, and opportunities for grooming through mesh sliding doors. In addition, cartoons and wildlife videos of content unrelated to the experiments, were presented on TV for no more than four hours a day. However, the face and voice of one of the researchers with whom the monkeys interacted were used during the experiments”. In this regard, line 393 of the discussion reads: “We found no clear evidence that learning CMAs that included possible ethologically relevant stimuli like human and monkey faces, or coos [20–31] were facilitated more than other CMAs to which they had no previous exposure.” </p>
<p> Line 94-102 – Monkey Training: In my opinion this is one of the most important methodological part of the paper. More details should be provided regarding the training procedure.</p>
<p> We expanded the Methods section ("Monkeys Training," lines 146-177) to include a detailed description of the training protocol in the Methods section ("Learning Measurements," lines 178-203), where we describe analytical methods of the monkeys’ learning process. We incorporated S2 Fig as Fig 2 so the monkeys' learning of CMAs can now be observed—a critical finding in our study. We also included S1 Fig to show the performance at all CMAs throughout learning and added sections to the Results and the Discussion specifically addressing the learning of CMAs. Specifically, sections "Rhesus Monkeys Can Learn Cross-modal Associations Between Stimuli of Different Types" (lines 251-294), and "Rhesus Macaques Create Crossmodal Associations Between Sounds and Images of Different Types" (lines 377-417), respectively.  </p>
<p>Line 146 / Fig 2C, insets: It would be interesting not just to present the FA rates, but also to show which are the most common mistakes (e.g. are monkey faces confused more often with human faces in comparison to the red oval).</p>
<p>Addressing this question, we included pie charts in Figure 3 (formerly Figure 2), showing the pictures selected during false alarms. We also included better descriptions of the results (lines 316-331) and a new S3 Table with the percentage of choices during hits and false alarms.</p>
<p>Lines 188-195: The authors identify 3 key findings, the first 2 of which directly follow the presented results. The third finding claims that “the monkeys’ success in forming crossmodal associations required […] working memory”, which is logical, however it is unclear how it could be derived from the results.</p>
<p>Regarding working memory, our task required it due to the 3s delay between auditory stimuli and picture options. We now provide better descriptions in the abstract, introduction, methods, results, and discussion. The abstract (lines 6-8) now reads: “In each trial, the monkeys listened to a brief sound (e.g., a monkey vocalization or a human word), and retained information about the sound to match it with one of 2–4 pictures presented on a touchscreen after a 3-second delay.” The Introduction (lines 45-50) now reads: “We specifically designed the task to temporally separate auditory and visual stimuli, thus engaging WM circuits to retain one modality in mind while awaiting the corresponding cross-modal stimulus.</p>
<supplementary-material id="pone.0317183.s008" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" position="float" xlink:href="info:doi/10.1371/journal.pone.0317183.s008" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to Reviewers.docx</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="aggregated-review-documents" id="pone.0317183.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0317183.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Vatakis</surname>
<given-names>Argiro</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2025</copyright-year>
<copyright-holder>Argiro Vatakis</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0317183" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">23 Dec 2024</named-content>
</p>
<p>Monkeys can identify pictures from words</p>
<p>PONE-D-24-30219R1</p>
<p>Dear Dr. Lemus,</p>
<p>We’re pleased to inform you that your manuscript has been judged scientifically suitable for publication and will be formally accepted for publication once it meets all outstanding technical requirements.</p>
<p>Within one week, you’ll receive an e-mail detailing the required amendments. When these have been addressed, you’ll receive a formal acceptance letter and your manuscript will be scheduled for publication.</p>
<p>An invoice will be generated when your article is formally accepted. Please note, if your institution has a publishing partnership with PLOS and your article meets the relevant criteria, all or part of your publication costs will be covered. Please make sure your user information is up-to-date by logging into Editorial Manager at <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">Editorial Manager®</ext-link> and clicking the ‘Update My Information' link at the top of the page. If you have any questions relating to publication charges, please contact our Author Billing department directly at <email xlink:type="simple">authorbilling@plos.org</email>.</p>
<p>If your institution or institutions have a press office, please notify them about your upcoming paper to help maximize its impact. If they’ll be preparing press materials, please inform our press team as soon as possible -- no later than 48 hours after receiving the formal acceptance. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>Kind regards,</p>
<p>Argiro Vatakis</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Additional Editor Comments (optional):</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. If the authors have adequately addressed your comments raised in a previous round of review and you feel that this manuscript is now acceptable for publication, you may indicate that here to bypass the “Comments to the Author” section, enter your conflict of interest statement in the “Confidential to Editor” section, and submit your "Accept" recommendation.<!-- </font> --></p>
<p>Reviewer #1: All comments have been addressed</p>
<p>Reviewer #2: All comments have been addressed</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p>Reviewer #1: In my view the manuscript is substantially improved with respect to the previous version. I don't have any additional comments and I would be glad to see that this well performed study will be published in this journal! Good luck with your further research on this topic!</p>
<p>Reviewer #2: All comments have been addressed, I believe this manuscript should be accepted for publication to PLOS One.</p>
<p>**********</p>
<p><!-- <font color="black"> -->7. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: <bold>Yes: </bold>Ivo D. Popivanov</p>
<p>Reviewer #2: No</p>
<p>**********</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pone.0317183.r004" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0317183.r004</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Vatakis</surname>
<given-names>Argiro</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2025</copyright-year>
<copyright-holder>Argiro Vatakis</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0317183" document-id-type="doi" document-type="article" id="rel-obj004" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">10 Jan 2025</named-content>
</p>
<p>PONE-D-24-30219R1 </p>
<p>PLOS ONE</p>
<p>Dear Dr.  Lemus, </p>
<p>I'm pleased to inform you that your manuscript has been deemed suitable for publication in PLOS ONE. Congratulations! Your manuscript is now being handed over to our production team.</p>
<p>At this stage, our production department will prepare your paper for publication. This includes ensuring the following:</p>
<p>* All references, tables, and figures are properly cited</p>
<p>* All relevant supporting information is included in the manuscript submission,</p>
<p>* There are no issues that prevent the paper from being properly typeset</p>
<p>If revisions are needed, the production department will contact you directly to resolve them. If no revisions are needed, you will receive an email when the publication date has been set. At this time, we do not offer pre-publication proofs to authors during production of the accepted work. Please keep in mind that we are working through a large volume of accepted articles, so please give us a few weeks to review your paper and let you know the next and final steps. </p>
<p>Lastly, if your institution or institutions have a press office, please let them know about your upcoming paper now to help maximize its impact. If they'll be preparing press materials, please inform our press team within the next 48 hours. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>If we can help with anything else, please email us at <email xlink:type="simple">customercare@plos.org</email>.</p>
<p>Thank you for submitting your work to PLOS ONE and supporting open access. </p>
<p>Kind regards, </p>
<p>PLOS ONE Editorial Office Staff</p>
<p>on behalf of</p>
<p>Dr. Argiro Vatakis </p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
</body>
</sub-article>
</article>