<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0281137</article-id>
<article-id pub-id-type="publisher-id">PONE-D-22-31186</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Semantics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject><subj-group><subject>Human learning</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject><subj-group><subject>Human learning</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject><subj-group><subject>Human learning</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Learning and memory</subject><subj-group><subject>Learning</subject><subj-group><subject>Human learning</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Language</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Language</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Language</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Psychology</subject><subj-group><subject>Cognitive psychology</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Learning and memory</subject><subj-group><subject>Learning</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Grammar</subject><subj-group><subject>Phonology</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Psycholinguistics</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Language acquisition</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3">
<subject>Social sciences</subject><subj-group><subject>Linguistics</subject><subj-group><subject>Grammar</subject><subj-group><subject>Syntax</subject></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Examining the potential influence of crosslinguistic lexical similarity on word-choice transfer in L2 English</article-title>
<alt-title alt-title-type="running-head">Potential influence of crosslinguistic lexical similarity on word-choice transfer</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-8916-9010</contrib-id>
<name name-style="western">
<surname>Shatz</surname>
<given-names>Itamar</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</role>
<role content-type="http://credit.niso.org/contributor-roles/data-curation/">Data curation</role>
<role content-type="http://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</role>
<role content-type="http://credit.niso.org/contributor-roles/investigation/">Investigation</role>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/visualization/">Visualization</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-original-draft/">Writing – original draft</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Alexopoulou</surname>
<given-names>Theodora</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"><sup>1</sup></xref>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Murakami</surname>
<given-names>Akira</given-names>
</name>
<role content-type="http://credit.niso.org/contributor-roles/methodology/">Methodology</role>
<role content-type="http://credit.niso.org/contributor-roles/writing-review-editing/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff002"><sup>2</sup></xref>
</contrib>
</contrib-group>
<aff id="aff001"><label>1</label> <addr-line>Department of Theoretical and Applied Linguistics, University of Cambridge, Cambridge, United Kingdom</addr-line></aff>
<aff id="aff002"><label>2</label> <addr-line>Department of English Language and Linguistics, University of Birmingham, Birmingham, United Kingdom</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Bongelli</surname>
<given-names>Ramona</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>University of Macerata: Universita degli Studi di Macerata, ITALY</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">is442@cam.ac.uk</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>1</day>
<month>2</month>
<year>2023</year>
</pub-date>
<pub-date pub-type="collection">
<year>2023</year>
</pub-date>
<volume>18</volume>
<issue>2</issue>
<elocation-id>e0281137</elocation-id>
<history>
<date date-type="received">
<day>11</day>
<month>11</month>
<year>2022</year>
</date>
<date date-type="accepted">
<day>15</day>
<month>1</month>
<year>2023</year>
</date>
</history>
<permissions>
<copyright-year>2023</copyright-year>
<copyright-holder>Shatz et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0281137"/>
<abstract>
<p>We examined whether and how L1-L2 crosslinguistic formal lexical similarity influences L2 word choice. Our sample included two learner subcorpora, containing 8,500 and 6,390 English texts, written in an educational setting, by speakers of diverse L1s in the A1–B2 CEFR range of L2 proficiency. We quantified similarity based on phonological overlap between L1 words and their L2 (English) translations. This similarity relates to psycholinguistic <italic>cognancy</italic>, which occurs when words and their translations share a high level of formal similarity, often due to historical cognancy from shared etymology or language contact. We then used mixed-effects statistical models to examine how this similarity influences the rate of use of the L2 words; essentially, we checked whether L2 words that are more similar to their L1 translations are used more often. We also controlled for potential confounds, including the baseline L1 frequency of the English words. The type of crosslinguistic similarity that we examined did <italic>not</italic> influence learners’ choice of L2 words in their writing in the present sample, which represents a type of educational setting that many learners encounter. This suggests that the influence of such similarity is constrained, and that communicative needs can override transfer from learners’ L1 to their L2, which raises questions regarding when and how else situational factors can influence transfer.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution>Hughes Hall</institution>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-8916-9010</contrib-id>
<name name-style="western">
<surname>Shatz</surname>
<given-names>Itamar</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award002">
<funding-source>
<institution>Cambridge Assessment English</institution>
</funding-source>
<principal-award-recipient>
<contrib-id authenticated="true" contrib-id-type="orcid">https://orcid.org/0000-0001-8916-9010</contrib-id>
<name name-style="western">
<surname>Shatz</surname>
<given-names>Itamar</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award003">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100004815</institution-id>
<institution>Isaac Newton Trust</institution>
</institution-wrap>
</funding-source>
<principal-award-recipient>
<name name-style="western">
<surname>Alexopoulou</surname>
<given-names>Theodora</given-names>
</name>
</principal-award-recipient>
</award-group>
<award-group id="award004">
<funding-source>
<institution>EF- Education First</institution>
</funding-source>
<principal-award-recipient>
<name name-style="western">
<surname>Alexopoulou</surname>
<given-names>Theodora</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>I.S. received financial support from Hughes Hall at the University of Cambridge (<ext-link ext-link-type="uri" xlink:href="https://www.hughes.cam.ac.uk/" xlink:type="simple">https://www.hughes.cam.ac.uk/</ext-link>) and Cambridge Assessment English (<ext-link ext-link-type="uri" xlink:href="https://www.cambridgeassessment.org.uk/" xlink:type="simple">https://www.cambridgeassessment.org.uk/</ext-link>). T.A. received financial support from the Isaac Newton Trust at the University of Cambridge (<ext-link ext-link-type="uri" xlink:href="https://www.newtontrust.cam.ac.uk/" xlink:type="simple">https://www.newtontrust.cam.ac.uk/</ext-link>) and EF Education First (<ext-link ext-link-type="uri" xlink:href="https://www.ef.co.uk/" xlink:type="simple">https://www.ef.co.uk/</ext-link>). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="6"/>
<page-count count="23"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All the data and code are available in the following Open Science Framework (OSF) repository: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17605/OSF.IO/5EUA8" xlink:type="simple">https://doi.org/10.17605/OSF.IO/5EUA8</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<sec id="sec002">
<title>Theoretical background</title>
<p>Learners’ native language (L1) influences their knowledge of and engagement with second language (L2) vocabulary, in terms of operations such as recognition, interpretation, storage, and retrieval. This is often attributed to <italic>lexical transfer</italic> [<xref ref-type="bibr" rid="pone.0281137.ref001">1</xref>], a type of <italic>language transfer</italic> or <italic>crosslinguistic influence</italic> [<xref ref-type="bibr" rid="pone.0281137.ref002">2</xref>–<xref ref-type="bibr" rid="pone.0281137.ref004">4</xref>]. Transfer can be <italic>positive</italic> when it facilitates language acquisition or use, for example because an L2 linguistic structure (e.g., a certain word) is identical to a corresponding structure in a learner’s L1, which makes it easier for the learner to use it. Transfer can also be <italic>negative</italic> when it hinders language acquisition or use, in which case it is sometimes called <italic>interference</italic>; this can occur, for example, because an L2 structure is very different from a corresponding structure in a learner’s L1, which makes it harder for the learner to use it.</p>
<p>A notable aspect of lexical transfer is that crosslinguistic similarity in form—i.e., <italic>formal similarity</italic> in phonology and/or orthography—between L1 words and their L2 translations facilitates the processing, acquisition, and use of the L2 words [<xref ref-type="bibr" rid="pone.0281137.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0281137.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0281137.ref005">5</xref>–<xref ref-type="bibr" rid="pone.0281137.ref012">12</xref>]. This similarity is usually conceptualized based on the overlap in sounds and/or letters between words in different languages. For example, the French word for “orange” is also spelled “orange” (though pronounced slightly differently), so it has higher formal similarity with its English translation than does the French word for “lemon” (“citron”). Accordingly, it will generally be easier for French speakers to acquire the English word “orange” than the word “lemon”.</p>
<p>When two words with similar meanings across languages have a high level of formal similarity, they can be considered to be psycholinguistic <italic>cognates</italic>, though there is no exact threshold for cognancy based on similarity. Psycholinguistic cognancy frequently occurs because the words are also historical cognates, meaning that they share a common etymology, though cognancy may also involve words that were borrowed during language contact [<xref ref-type="bibr" rid="pone.0281137.ref011">11</xref>, <xref ref-type="bibr" rid="pone.0281137.ref013">13</xref>, <xref ref-type="bibr" rid="pone.0281137.ref014">14</xref>].</p>
<p>The facilitative effect of formal crosslinguistic similarity—referred to as the <italic>cognate facilitation effect</italic> when it involves cognates—is well-attested in the psycholinguistic and second-language acquisition (SLA) literature, and has been attributed to various cognitive mechanisms. The general explanation for it is that similarity in form between L1 and L2 words that share similar meanings facilitates the linking and/or mapping of L2 words to their L1 counterparts or to shared concepts, which facilitates the transfer of linguistic (e.g., semantic, syntactic, and morphological) information from the L1 to the L2 [<xref ref-type="bibr" rid="pone.0281137.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0281137.ref007">7</xref>–<xref ref-type="bibr" rid="pone.0281137.ref010">10</xref>, <xref ref-type="bibr" rid="pone.0281137.ref015">15</xref>, <xref ref-type="bibr" rid="pone.0281137.ref016">16</xref>].</p>
<p>Like most types of crosslinguistic influence, this form of lexical transfer is expected to play a role primarily during early stages of SLA, when learners rely more on their L1 in order to form and use their mental lexicon. However, this influence can also play a role at advanced stages of SLA, and therefore affect even highly proficient L2 learners [<xref ref-type="bibr" rid="pone.0281137.ref001">1</xref>, <xref ref-type="bibr" rid="pone.0281137.ref005">5</xref>, <xref ref-type="bibr" rid="pone.0281137.ref006">6</xref>, <xref ref-type="bibr" rid="pone.0281137.ref010">10</xref>, <xref ref-type="bibr" rid="pone.0281137.ref015">15</xref>].</p>
<p>Since previous studies on this crosslinguistic influence focused on L2 processing (e.g., recognition, comprehension, and retrieval), it remains unclear whether and how this effect extends to L2 production, especially since various factors might play a different role in processing than in production. For example, the goal and context of communication might play a greater role in L2 production (e.g., word choice in essays) than in many experimental processing paradigms (e.g., reaction time to isolated words).</p>
<p>There is evidence that increased overall lexical similarity between languages improves learning outcomes, thus leading to higher scores in L2 proficiency tests [<xref ref-type="bibr" rid="pone.0281137.ref017">17</xref>–<xref ref-type="bibr" rid="pone.0281137.ref019">19</xref>]. This could be due to facilitated processing of L2 words, which can, in turn, facilitate general acquisition, since the more words learners understand, the more input they can decipher, and the more easily they acquire words and other structures [<xref ref-type="bibr" rid="pone.0281137.ref020">20</xref>]. However, this finding is based on similarity between languages as a whole, and on composite L2 proficiency scores that involve a mix of factors, including vocabulary and grammar. Accordingly, it does not tell us if similarity across L1-L2 words influences the production and choice of individual L2 words.</p>
<p>Some evidence regarding this comes from studies of <italic>word choice transfer</italic>, a type of lexical transfer whereby a person’s knowledge of a language influences their choice of words in another language [<xref ref-type="bibr" rid="pone.0281137.ref021">21</xref>–<xref ref-type="bibr" rid="pone.0281137.ref024">24</xref>]. This transfer means that learners’ use of specific words and phrases—referred to as <italic>lexical signature</italic>, <italic>lexical style</italic>, or <italic>wordprints</italic>—can be used in stylometry to aid L1 identification [<xref ref-type="bibr" rid="pone.0281137.ref021">21</xref>, <xref ref-type="bibr" rid="pone.0281137.ref022">22</xref>]. This applies both to relatively constrained settings such as TOEFL essays (which we will call <italic>task-based settings</italic>), where communication is fairly limited in terms of factors like the permissible topic and style, as well as to more <italic>spontaneous settings</italic>, where the topic and style of communication are not as constrained (e.g., when people are allowed to talk about almost whatever they want). However, studies on word-choice transfer generally only investigated whether learners’ L1 influences their choice of L2 words, but did not investigate what factors specifically drive this crosslinguistic influence.</p>
<p>One exception is Rabinovich et al. (2018), who showed that L1-L2 similarity can influence L2 word choice [<xref ref-type="bibr" rid="pone.0281137.ref013">13</xref>]. Specifically, they investigated the relatively spontaneous productions on a social media website (Reddit) of highly proficient (near-native) L2 English speakers of various Indo-European L1s. They focused on English words that were part of a <italic>synset</italic>, which is a set of multiple synonyms that correspond to the same meaning. Specifically, Rabinovich et al. focused on synsets where the synonyms had at least two different etymological paths (under the assumption that etymological cognancy generally leads to increased formal similarity), and the synonyms themselves were fairly interchangeable. They found clear evidence of a cognate facilitation effect, meaning that the speakers were more likely to use English words that are cognate with their L1 translation. For more information on this study, see Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Analysis of synonym sets”).</p>
<p>However, there is also evidence suggesting that the effect of crosslinguistic similarity might not extend to productions in task-based settings. Specifically, Crossley and McNamara (2011) found that L2 texts written by speakers with different L1s had similar scores on several global lexical measures, such as lexical diversity and polysemy, despite different levels of similarity between their L1s and the target L2 [<xref ref-type="bibr" rid="pone.0281137.ref025">25</xref>]. This is based on 599 L2 English texts in the <italic>International Corpus of Learner English</italic> (ICLE), written by Czech, Finnish, German, and Spanish speakers, who are “high intermediate to advanced” L2 English speakers (p. 274), and who wrote the texts as a response to one of few prompts for argumentative essays. A potential explanation for this finding is that, in these relatively constrained task-based settings, learners choose to use only words that are sufficiently relevant for their communication, regardless of which words are easier to use due to crosslinguistic similarity. Essentially, the factors constraining the communication (e.g., narrow communicative goals or the necessary formality level) may serve as situational and contextual factors that override the transfer from learners’ L1 [<xref ref-type="bibr" rid="pone.0281137.ref021">21</xref>]. But, it is unclear if this is indeed the case, or if the findings of Crossley and McNamara can be attributed to a different factor, such as that they focused on global lexical measures, rather than on the use of individual words.</p>
<p>To summarize, there is clear evidence of a facilitative effect of L1-L2 lexical similarity on L2 processing, comprehension, and learning, particularly at the early stages of SLA [<xref ref-type="bibr" rid="pone.0281137.ref002">2</xref>, <xref ref-type="bibr" rid="pone.0281137.ref017">17</xref>], and there is also evidence that learners’ L1 can influence their L2 word choice [<xref ref-type="bibr" rid="pone.0281137.ref013">13</xref>, <xref ref-type="bibr" rid="pone.0281137.ref022">22</xref>]. However, evidence regarding the influence of crosslinguistic <italic>similarity</italic> on L2 word choice is limited and less clear, especially in task-based settings.</p>
</sec>
<sec id="sec003">
<title>Research questions</title>
<p>We investigate the potential influence of crosslinguistic formal lexical similarity on word choice in a task-based English-as-a-foreign language (EFL) educational setting, to answer the following questions:</p>
<list list-type="order">
<list-item><p>Does increased similarity in form between L1 words and their L2 translations lead to increased use of the L2 words in this setting?</p></list-item>
<list-item><p>If there is an effect of crosslinguistic similarity in such task-based settings, is it moderated by learners’ L2 proficiency?</p></list-item>
</list>
<p>Answering these questions will help determine whether the effect identified by Rabinovich et al. [<xref ref-type="bibr" rid="pone.0281137.ref013">13</xref>] extends to task-based settings (we compare our approach with theirs in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>—“Comparison of our approach with that Rabinovich et al.”). Furthermore, it will help determine whether findings regarding word-choice transfer in task-based settings are likely attributable to some degree to crosslinguistic similarity, and whether the lack of L1 effect (i.e., <italic>intergroup homogeneity</italic>) found by Crossley and McNamara [<xref ref-type="bibr" rid="pone.0281137.ref025">25</xref>] is simply a feature of the global lexical measure that they used and/or their sample. In addition, the focus on an educational EFL setting will shed light on the influence of crosslinguistic similarity in this type of common environment, where, as we will see, there are often strong task effects on word choices.</p>
</sec>
<sec id="sec004">
<title>Our approach</title>
<p>We examined how formal similarity between L2 English words and their L1 translations influences the usage rates of the L2 words. For example, we wanted to see if, in a task dealing with food, an Italian learner of English will be more likely to use the word “lemon” than a French speaker, because the Italian word for “lemon” (“limone”) sounds more similar to the English word than the French one (“citron”) does. If similarity plays a role in this context, then we expected that learners—especially beginners—will prefer using similar words, because they are easier for them to process.</p>
<p>To investigate this, we constructed lists of L1-L2 word pairs, containing words in various L1s (e.g., German and French), together with their corresponding translations in English as the target L2 (e.g., citron-lemon). Then, we calculated the formal similarity between the words in each L1-L2 pair, based on the phonological overlap of the sounds that the words contain, where increased overlap denotes increased similarity (i.e., decreased lexical distance). We also found the baseline frequency of the target English words, to control for it in our analyses.</p>
<p>Next, we took a large-scale EFL learner corpus, containing texts written in response to various writing tasks, by learners with diverse L1s and varied L2 proficiency. Using the L1-L2 wordlists from the previous stage, we counted the number of times each target English word from the wordlists appeared in each text.</p>
<p>Finally, we built mixed-effects statistical models, to determine whether the rate of use of the target English words in the texts was predicted by the lexical similarity between each English word and its L1 translation, and whether this effect was moderated by L2 proficiency (to check if the effect of similarity is stronger at lower L2 proficiency levels). Our models controlled for relevant background variables, including the baseline frequency of the English words, as well as <italic>task</italic> and <italic>item</italic> effects. Ultimately, our key question was whether, all things being equal, L2 words that are more similar to their L1 translations will be used at higher rates, compared to words that are less similar.</p>
</sec>
</sec>
<sec id="sec005" sec-type="materials|methods">
<title>Methodology</title>
<p>Data and code are available at the following <italic>Open Science Framework</italic> (OSF) repository: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.17605/OSF.IO/5EUA8" xlink:type="simple">https://doi.org/10.17605/OSF.IO/5EUA8</ext-link></p>
<sec id="sec006">
<title>Crosslinguistic similarity/distance</title>
<sec id="sec007">
<title>Distance datasets</title>
<p>We quantify crosslinguistic formal similarity based on the phonological distance between L1 words and their L2 translations, where increased distance denotes lower similarity. We will henceforth refer to this as <italic>lexical distance</italic>, though we use phonological distance as a proxy of overall lexical distance, which subsumes other types of similarity; for more information on this choice of terminology, see Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under ‘The term “lexical distance”‘). To do this, we use two datasets, which contain lists of corresponding words in different languages, as outlined briefly below. For more information on these datasets and their processing, see the “Lexical-distance datasets information” document in the study’s OSF repository.</p>
<p>The first lexical-distance dataset is the <italic>Automated Similarity Judgment Program</italic> (ASJP) [<xref ref-type="bibr" rid="pone.0281137.ref026">26</xref>]. It contains Swadesh lists, which are often used by researchers to calculate the lexical distance between languages [e.g., <xref ref-type="bibr" rid="pone.0281137.ref018">18</xref>], and which contain words representing various concepts, such as <italic>hear</italic>, <italic>water</italic>, <italic>full</italic>, <italic>one</italic>, and <italic>dog</italic> [<xref ref-type="bibr" rid="pone.0281137.ref026">26</xref>, <xref ref-type="bibr" rid="pone.0281137.ref027">27</xref>].</p>
<p>The Swadesh lists in the ASJP focus on a subset of 40 concepts; to control for variation in the completeness of the Swadesh lists across languages, we included in our analysis only the 38 concepts that are shared by all the languages in our sample. These languages, which are based on the ones available in the learner sample that is outlined later, are: Arabic, French, German, Italian, Japanese, Mandarin, Portuguese, Russian, and Spanish as L1s, and English as the target L2. In addition, we focus on single-word entries, in line with most prior research and to avoid potential confounds, and so we included only entries that do <italic>not</italic> contain a multi-word phrase in any of the L1s or English. Accordingly, the final Swadesh-based sample contains 225 entries, with 25 entries for each of the 9 L1s, where each entry is a row containing an English word together with all its L1 counterparts in a specific L1.</p>
<p>The second lexical-distance dataset that we use is the <italic>Intercontinental Dictionary Series</italic> (IDS), which contains parallel dictionaries in various languages [<xref ref-type="bibr" rid="pone.0281137.ref028">28</xref>]. Similarly to the Swadesh lists, this dataset also contains a standardized list of words and their corresponding counterparts in various languages. The parallel dictionaries contain substantially more words per language than the Swadesh lists (~1,300 general word meanings compared to ~40). However, they contain data only for French, German, Italian, Portuguese, and Spanish (out of the L1s in the present sample). Accordingly, they complement the Swadesh lists, but do not replace them.</p>
<p>As with the Swadesh lists, we included only single-word entries in our analysis of this dataset. Furthermore, we removed from the parallel dictionaries a small number of words (22) that also appeared in the Swadesh lists, so that the words in each dataset were unique. Accordingly, the final parallel-dictionaries sample contains 5,515 entries, with 1,103 entries for each of the 5 L1s, where each entry is a row containing an English word and all its L1 counterparts in a single L1.</p>
<p>Our approach aligns in this regard with Rabinovich et al. (2018), who created their wordlist (with 1,143 words) based on their lexical dataset (<italic>Etymological WordNet</italic>) rather than their learner sample, though they did use learner data when choosing the most prominent sense of a word in cases where multiple parts-of-speech categories were available. An alternative potential approach for creating these wordlists is to base them on the words that appear in our learner sample. However, this could bias the analyses, since the presence and absence of words from the sample can be an important signal regarding associated crosslinguistic influence.</p>
<p>To illustrate this, consider a simple situation, where we compare, among German learners, the rate of use of two English words, with an equally low baseline frequency. One of the English words is similar to its German translation, whereas the other one is dissimilar. If there is indeed a facilitative effect of similarity, then we might expect that the dissimilar word will not be used by learners (because it has low baseline frequency), but that the similar word will be used despite the low baseline frequency (because of the facilitative effect). However, if we remove the distant word from our analysis because it was not used at all, then we would be obscuring the effects of similarity by comparison. Essentially, the fact that a word is not used at all by learners is important to our analyses, as it allows us to more accurately assess the effects of distance.</p>
</sec>
<sec id="sec008">
<title>Calculating lexical distance</title>
<p>The lexical-distance measure that we use is <italic>Levenshtein distance normalized</italic> (LDN). Extensive information about this measure, including its psycholinguistic validation, is presented in Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Validation of Levenshtein distance”), and is also summarized below.</p>
<p>Intuitively, LDN generally represents the degree of phonological or orthographic overlap between two words. It is calculated by taking the minimum number of character substitutions, additions, and deletions that are needed to transform one string to another (i.e., the <italic>Levenshtein distance</italic>), and dividing it by the length of the longer string, to account for variations in word length. For example, in the case of the word <italic>knee</italic>, the English-German pair /ni/-/kni/ has an LDN of 0.33, since there is 1 character transformation (a /k/ is inserted or deleted), and the length of the longer string is 3. By contrast, the LDN for the corresponding English-Japanese pair /ni/-/hiza/ is greater (0.75), since there is less overlap, so more transformations are needed.</p>
<p>In the present research, we first calculated lexical distance between each L1 entry and its corresponding L2 (English) entry, based on their phonological (IPA) transcription. When there were multiple L1 synonyms available (e.g., “soil” in French—<italic>sol</italic> and <italic>terre</italic>), we used the distance from the closest synonym, as our goal was to identify cases where the L2 word is closely similar to an L1 word (and is likely also cognate with it).</p>
<p>We used phonological—rather than orthographic—overlap as a measure of distance, because this enables us to examine distance from L1s that have a substantially different script from English, like Arabic and Mandarin. Nevertheless, in the parallel-dictionaries sample, where all the L1s share English’s Latin script, there was a strong correlation between phonological and orthographic overlap (<italic>r</italic> = .68, <italic>95% CI</italic> = [.67, .70], <italic>p</italic> &lt; .001). This aligns with findings of other research [<xref ref-type="bibr" rid="pone.0281137.ref011">11</xref>, <xref ref-type="bibr" rid="pone.0281137.ref029">29</xref>, <xref ref-type="bibr" rid="pone.0281137.ref030">30</xref>], like an <italic>r</italic> = .782 found in a dataset of English and Spanish words [<xref ref-type="bibr" rid="pone.0281137.ref011">11</xref>]. This strong correlation suggests that the phonological overlap that we found for L1s that share English’s script is indicative of the associated orthographic overlap for these L1s, so even if a large part of the effect of similarity is due to overlap in orthography, we would expect to detect it in our analyses.</p>
<p>We used LDN for several reasons. First, it can be calculated in an automated, objective, and replicable manner for a large number of words from different languages [<xref ref-type="bibr" rid="pone.0281137.ref014">14</xref>]. Second, it is the most conventional measure that is used for this purpose, and, as shown in detail in Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Validation of Levenshtein distance”), it has been extensively validated, including through correlations with other measures of language distance, such as expert cognancy judgments from historical linguistics and perceived language distance from psycholinguistics [<xref ref-type="bibr" rid="pone.0281137.ref014">14</xref>, <xref ref-type="bibr" rid="pone.0281137.ref031">31</xref>]. Furthermore, LDN was used by other SLA researchers [e.g., <xref ref-type="bibr" rid="pone.0281137.ref030">30</xref>] to quantify crosslinguistic similarity between individual words—often to distinguish cognates from non-cognates when investigating cognate facilitation at the word level. It was also found to be a robust predictor of relevant L2 outcomes, including word recognition [<xref ref-type="bibr" rid="pone.0281137.ref011">11</xref>] and retrieval [<xref ref-type="bibr" rid="pone.0281137.ref032">32</xref>].</p>
<p>However, LDN also has some important limitations, discussed in Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Limitations of LDN”), which we will also briefly outline below.</p>
<p>The first limitation is that LDN treats all character transformations as equal, even though some transformations are less “substantial” phonologically than others. We partially addressed this by replicating our analyses using <italic>feature edit distance</italic> (Appendix S2 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>).</p>
<p>The second limitation is that our use of LDN only considers one aspect of formal similarity (phonological overlap), but other formal factors (e.g., orthographic depth) and non-formal factors (e.g., semantic/pragmatic similarity), may also affect crosslinguistic influence. Nevertheless, past studies [e.g., <xref ref-type="bibr" rid="pone.0281137.ref032">32</xref>] found a facilitative effect of formal similarity even without considering such factors, as did Rabinovich et al. [<xref ref-type="bibr" rid="pone.0281137.ref013">13</xref>], who did not investigate the influence of these factors. Furthermore, we used mixed-models to control for some of these potential effects, and replicated our analyses on a sub-sample containing only German speakers (“German-only models” in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>), to minimize the influence of some of these factors (e.g., variation in the effects of similarity across language families).</p>
<p>Finally, LDN does not assess cognancy directly, which we use in the psycholinguistic sense, of words that have similar meaning and pronunciation/spelling across languages. Rather, LDN only quantifies the formal similarity between words that are generally similar in terms of meaning. Nevertheless, as noted above, LDN is strongly correlated with cognancy, and has been used to estimate cognancy in SLA studies that then used it to successfully predict L2 outcomes, including at the word level [<xref ref-type="bibr" rid="pone.0281137.ref011">11</xref>, <xref ref-type="bibr" rid="pone.0281137.ref032">32</xref>], so we expect to be a reasonable approximation in the context of the present large-scale analyses.</p>
<p>These limitations are important to keep in mind. However, given the ways we addressed them (as outlined above and in “Limitations of LDN” in Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>), and given the validation for the use of LDN in the manner we are using it (as outlined above and in “Validation of Levenshtein distance” in Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>), we believe that the use of LDN is reasonable in the present study. Notably, even if it will be unable to perfectly capture <italic>all</italic> of the effects of crosslinguistic similarity, it should be able to successfully capture <italic>some</italic> of them, as it did in many past SLA studies.</p>
<p>Likewise, although we did not focus on the effects of similarity on word choice within synonym sets in particular (unlike Rabinovich et al.), our sample does include such sets, as shown in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Analysis of synonym sets”). Given this, we would expect to find at least some effect of crosslinguistic similarity in the sample, even if it is confined only to such sets, though we do not claim that this is necessarily the case.</p>
</sec>
<sec id="sec009">
<title>Lexical distances</title>
<p><xref ref-type="fig" rid="pone.0281137.g001">Fig 1</xref> and <xref ref-type="table" rid="pone.0281137.t001">Table 1</xref> contain information about the the lexical distances between the L1s in the sample and English. The distances of all word pairs are available in the data files in the OSF repository (under “Lexical distance &amp; frequency data”).</p>
<fig id="pone.0281137.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Lexical distance between L1 words and English, per L1 in each dataset.</title>
<p>The distance is equal to the phonological LDN between L1 words and their most lexically similar English counterpart. Within the boxplots, the middle line indicates the median, the lower/upper hinges indicate the 1<sup>st</sup>/3<sup>rd</sup> quartiles, the whiskers indicate 1.5 interquartile ranges (IQR) past the hinges, and the dots indicate outliers. The violin plots indicate an estimate of the probability density of lexical distance for each L1, which can be viewed as the likelihood that a word in each L1 will have a certain lexical distance, where increased width indicates greater likelihood. Data is based on 25 words per L1 in the Swadesh lists and 1,103 words per L1 in the parallel dictionaries (after the removal of multi-word entries). These L1s were chosen based on the ones available in the learner sample, which is presented later.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.g001" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0281137.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.t001</object-id>
<label>Table 1</label> <caption><title>Statistics about the lexical distances between the L1s and English in each dataset.</title> <p>L1s are arranged in order of increasing mean lexical distance in the Swadesh lists.</p></caption>
<alternatives>
<graphic id="pone.0281137.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center" colspan="5">Swadesh lists</th>
<th align="center" colspan="5">Parallel dictionaries</th>
</tr>
<tr>
<th align="left">L1</th>
<th align="center">mean</th>
<th align="center">SD</th>
<th align="center">median</th>
<th align="center">IQR</th>
<th align="center">range</th>
<th align="center">mean</th>
<th align="center">SD</th>
<th align="center">median</th>
<th align="center">IQR</th>
<th align="center">range</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">German</td>
<td align="right">.622</td>
<td align="right">.27</td>
<td align="right">0.60</td>
<td align="right">0.50–0.75</td>
<td align="right">0.00–1.00</td>
<td align="right">.785</td>
<td align="right">.18</td>
<td align="right">0.80</td>
<td align="right">0.67–1.00</td>
<td align="right">0.00–1.00</td>
</tr>
<tr>
<td align="left">Italian</td>
<td align="right">.776</td>
<td align="right">.20</td>
<td align="right">0.80</td>
<td align="right">0.67–1.00</td>
<td align="right">0.29–1.00</td>
<td align="right">.847</td>
<td align="right">.16</td>
<td align="right">0.88</td>
<td align="right">0.75–1.00</td>
<td align="right">0.20–1.00</td>
</tr>
<tr>
<td align="left">Spanish</td>
<td align="right">.808</td>
<td align="right">.21</td>
<td align="right">0.80</td>
<td align="right">0.71–1.00</td>
<td align="right">0.29–1.00</td>
<td align="right">.860</td>
<td align="right">.16</td>
<td align="right">0.88</td>
<td align="right">0.80–1.00</td>
<td align="right">0.20–1.00</td>
</tr>
<tr>
<td align="left">French</td>
<td align="right">.813</td>
<td align="right">.20</td>
<td align="right">0.83</td>
<td align="right">0.67–1.00</td>
<td align="right">0.25–1.00</td>
<td align="right">.814</td>
<td align="right">.20</td>
<td align="right">0.83</td>
<td align="right">0.67–1.00</td>
<td align="right">0.00–1.00</td>
</tr>
<tr>
<td align="left">Portuguese</td>
<td align="right">.848</td>
<td align="right">.18</td>
<td align="right">0.86</td>
<td align="right">0.80–1.00</td>
<td align="right">0.50–1.00</td>
<td align="right">.873</td>
<td align="right">.15</td>
<td align="right">0.89</td>
<td align="right">0.80–1.00</td>
<td align="right">0.20–1.00</td>
</tr>
<tr>
<td align="left">Japanese</td>
<td align="right">.864</td>
<td align="right">.15</td>
<td align="right">0.86</td>
<td align="right">0.75–1.00</td>
<td align="right">0.50–1.00</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
</tr>
<tr>
<td align="left">Russian</td>
<td align="right">.881</td>
<td align="right">.21</td>
<td align="right">1.00</td>
<td align="right">0.80–1.00</td>
<td align="right">0.00–1.00</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
</tr>
<tr>
<td align="left">Arabic</td>
<td align="right">.887</td>
<td align="right">.14</td>
<td align="right">1.00</td>
<td align="right">0.80–1.00</td>
<td align="right">0.50–1.00</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
</tr>
<tr>
<td align="left">Mandarin</td>
<td align="right">.924</td>
<td align="right">.13</td>
<td align="right">1.00</td>
<td align="right">0.83–1.00</td>
<td align="right">0.50–1.00</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
<td align="right">-</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t001fn001"><p><italic>Note</italic>. The distance measure is based on the phonological LDN from the closest synonym, calculated for the single-word entries in each dataset. There were 225 entries in the Swadesh lists (i.e., rows with an English word and all its corresponding counterparts in a certain L1), with 25 entries for each of the 9 L1s in the dataset. There were 5,515 entries in the parallel dictionaries, with 1,103 for each of the 5 L1s. All counts are after the removal of multi-word entries.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>This figure and table show that the words in the datasets cover the full range of distances from English (0–1). However, most words are highly dissimilar (with an LDN at or near 1), even in L1s that are relatively lexically similar to English (e.g., German and French). This is important, since it suggests that in naturalistic settings, L2 learners may have limited opportunities to benefit from facilitative effects of crosslinguistic lexical similarity, so they must adapt to using L2 words that are dissimilar from their L1 translations.</p>
<p>Despite using this type of representative sample, there was a sufficient range of distances in our sample that the estimates of its effects were precise in our models, as shown in the results. Nevertheless, due to concerns that the high degree of dissimilarity might obscure the effects of crosslinguistic influence, we replicated our analyses using data from just German speakers, as this was the L1 that was the closest to English, and had the broadest range of LDN values. This analysis (presented in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>, under “German-only models”), is similar to the analyses of other researchers who analyzed L2 acquisition among speakers of a single L1, and in particular an L1 that is relatively similar to the L2, like De Wilde et al., who looked at the acquisition of L2 English among L1 Dutch speakers [<xref ref-type="bibr" rid="pone.0281137.ref030">30</xref>].</p>
<p>In addition, the distances are largely aligned with those based on general language classification. Specifically, the Germanic and Romance L1s are the closest to English, and the Indo-European L1s are closer to English than the non-Indo-European L1s, except that Japanese is shown as being closer to English than Russian is (so Japanese is closer than we would expect, and Russian is further than we would expect). However, because the lexical-distance datasets were modified through the removal of multi-word entries, the overall similarity between each L1 and English that is shown in this figure and table should <italic>not</italic> be interpreted as the mean similarity between that L1 and English. Indeed, as shown in Appendix S1 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Validation of Levenshtein distance”), when the unmodified wordlists are used, meaning that multi-word entries remain in the sample, Japanese and Russian switch positions as expected, and consequently, all the Indo-European L1s are closer to English than the non-Indo-European L1s. Nevertheless, this is not important for our analyses, since we focus on the similarity and use of individual words, rather than on similarity at the language level and on global measures of word use (e.g., lexical diversity).</p>
</sec>
</sec>
<sec id="sec010">
<title>Baseline word frequency</title>
<p><italic>Baseline word frequency</italic> represents how often an English word is used in general English. We need to control for this, since it can influence our response variable (the usage rate of L2 words). The “Baseline frequency information” document in the OSF repository contains detailed information about how we calculated this frequency. To summarize, we used the <italic>wordfreq</italic> library in Python [<xref ref-type="bibr" rid="pone.0281137.ref033">33</xref>], which curates frequency information from a number of diverse and large-scale sources, including books, subtitles, news, and social media. We used their <italic>Zipf frequency</italic> measure, developed by van Heuven et al. [<xref ref-type="bibr" rid="pone.0281137.ref034">34</xref>], which is the base-10 logarithm of the number of times a word appears per billion words (e.g., a Zipf value of 6 means a word appears once per thousand words).</p>
<p><xref ref-type="fig" rid="pone.0281137.g002">Fig 2</xref> shows the frequency distribution of the English words in our lexical-distance datasets. All frequencies are available in the OSF repository (under “Lexical distance &amp; frequency data”). The mean Zipf frequency in the Swadesh lists was 5.24 (SD = 0.72, median = 5.14, range = 4.15–7.11), and the mean Zipf frequency in the parallel dictionaries was 4.35 (SD = 0.83, median = 4.32, range = 1.87–7.41). Accordingly, both datasets included a wide range of words with different frequencies, though this range was greater in the parallel dictionaries.</p>
<fig id="pone.0281137.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.g002</object-id>
<label>Fig 2</label>
<caption>
<title>The baseline (Zipf) frequency of the English words in each lexical-distance dataset.</title>
<p>Within the boxplots, the line inside the box indicates the median, the lower/upper hinges indicate the 1<sup>st</sup>/3<sup>rd</sup> quartiles, the whiskers indicate 1.5 IQRs past the hinges, and the dots indicate outliers. The violin plots indicate an estimate of the probability density of the frequency of English words. Data is based on 25 English words in the Swadesh lists and 1,103 words in the parallel dictionaries.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.g002" xlink:type="simple"/>
</fig>
<p>One concern regarding word frequencies was there will not be enough high-level (i.e., low-frequency) vocabulary words in the lists, which could be a problem if the effects of similarity are stronger in—or restricted to—low-frequency words. However, as shown in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Comparison of baseline word frequencies”), the distribution of the Zipf frequencies in our parallel-dictionaries sample—based on the mean, SD, and range—is similar to that of other studies that found a cognate facilitation effect, and our sample also contains substantially more (1,103) words, so this should not be an issue for our analyses. Furthermore, as explained in the “Data analysis” section of the paper and the “Added-interactions models” section in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>, we built supplementary models, which showed that there is no interaction between distance and frequency in our sample.</p>
<p>In addition, note that past studies found a cognate facilitation effect even when controlling for frequency [e.g., <xref ref-type="bibr" rid="pone.0281137.ref008">8</xref>, <xref ref-type="bibr" rid="pone.0281137.ref011">11</xref>, <xref ref-type="bibr" rid="pone.0281137.ref030">30</xref>], as shown under “Correlations of distance, frequency, and word use” in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>. Accordingly, this effect does not appear to be simply the result of a frequency confound, and we would expect to find a similar effect in the present sample, even when controlling for frequency.</p>
</sec>
<sec id="sec011">
<title>Learner sample</title>
<p>In this section, we briefly outline the learner sample that we used. For more details on it, see the “Sample information” document in the study’s OSF repository.</p>
<p>The learner sample came from the <italic>EF-Cambridge Open Language Database</italic> (EFCAMDAT), an open-access EFL learner corpus, containing texts written by learners in <italic>Englishtown</italic>—EF’s online English school [<xref ref-type="bibr" rid="pone.0281137.ref035">35</xref>–<xref ref-type="bibr" rid="pone.0281137.ref037">37</xref>]. When a learner joins Englishtown, their English proficiency is determined through a dedicated placement test [<xref ref-type="bibr" rid="pone.0281137.ref035">35</xref>]. Based on this, they are placed at a starting proficiency level, and the EFCAMDAT spans 16 such levels, which EF has aligned with common proficiency standards [<xref ref-type="bibr" rid="pone.0281137.ref035">35</xref>], such as the Common European Framework of Reference for Languages (CEFR) [<xref ref-type="bibr" rid="pone.0281137.ref038">38</xref>]. Each level consists of several distinct lessons. After completing a lesson, learners are assigned a writing task that they submit online, and receive feedback on from a teacher. These tasks, which are described in more detail in the “Sample information” document (under “Background information on the EFCAMDAT”), cover a wide range of styles and topics, such as describing your favourite day, reviewing a song, writing an online profile, or giving instructions to a house-sitter. The curriculum is standardized, so learners with different L1s follow the same lessons and activities, and are given the same writing tasks. Note that we use the term “task” here in the sense in which it is generally used in the EFCAMDAT; as shown in the later explanation of our analyses, we do not make a claim regarding the influence of different specific aspects of the tasks, such as their genre [<xref ref-type="bibr" rid="pone.0281137.ref039">39</xref>].</p>
<p>For our analyses, we used the <italic>EFCAMDAT Cleaned Subcorpus</italic> [<xref ref-type="bibr" rid="pone.0281137.ref040">40</xref>]. The key feature of this dataset is that it is split into two subcorpora, each of which contains texts written by similar learners in response to different lessons and prompts. This means, for example, that both the first and the second subcorpora contain texts written by Mandarin learners in task #5, but the learners in the first subcorpus wrote their texts after a different lesson and in response to a different prompt than the learners in the second subcorpus. Accordingly, using this dataset presents two important advantages for research. First, it allows us to accurately categorize texts based on the task that they correspond to. Second, this offers an opportunity to analyze two similar but distinct learner samples, which serves as a form of replication.</p>
<p>We selected random texts from this dataset, in a balanced manner across L1s, proficiency levels, and tasks. For a full explanation of this process, see the relevant document in the OSF repository (under “Sample selection process and final sample”). The final samples are outlined in <xref ref-type="table" rid="pone.0281137.t002">Table 2</xref>.</p>
<table-wrap id="pone.0281137.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.t002</object-id>
<label>Table 2</label> <caption><title>Final learner samples (with English as the target L2).</title></caption>
<alternatives>
<graphic id="pone.0281137.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<tbody>
<tr>
<td align="left">L1s <xref ref-type="table-fn" rid="t002fn001"><sup>a</sup></xref></td>
<td align="left">Arabic, Japanese, Mandarin, Russian (these appear only in the Swadesh sample)<break/>French, German, Italian, Portuguese, Spanish (these appear in both the Swadesh and parallel samples) <xref ref-type="table-fn" rid="t002fn002"><sup>b</sup></xref></td>
</tr>
<tr>
<td align="left">L2 proficiency levels</td>
<td align="left">EFCAMDAT 1–12 (equivalent to CEFR A1–B2)</td>
</tr>
<tr>
<td align="left">Number of tasks per subcorpus</td>
<td align="left">95 (first) / 71 (second) <xref ref-type="table-fn" rid="t002fn003"><sup>c</sup></xref></td>
</tr>
<tr>
<td align="left">Number of texts per L1 per task</td>
<td align="left">10 <xref ref-type="table-fn" rid="t002fn004"><sup>d</sup></xref></td>
</tr>
<tr>
<td align="left">Number of texts per subcorpus</td>
<td align="left">In Swadesh sample: 8,500 (first) / 6,390 (second)<break/>In parallel sample: 4,747 (first) / 3,550 (second) <xref ref-type="table-fn" rid="t002fn005"><sup>e</sup></xref></td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t002fn001"><p><sup>a</sup> L1s in the EFCAMDAT are estimated based on learners’ nationality, an approach that has been used in previous studies and validated empirically, as shown in the OSF “Sample Information” document under “Background information on the EFCAMDAT”.</p></fn>
<fn id="t002fn002"><p><sup>b</sup> The nationality for Arabic is Saudi Arabian; for Mandarin—Chinese; for Portuguese—Brazilian; for Spanish—Mexican. For other L1s, the L1 is based on the corresponding nationality (e.g., Japanese).</p></fn>
<fn id="t002fn003"><p><sup>c</sup> There are 8 tasks per EFCAMDAT level in the first subcorpus and 6 tasks per level in the second (with one task per lesson). An exception is task #51, in which texts from both subcorpora were placed in the first subcorpus due to the software used to classify them, so this task was removed from this sample.</p></fn>
<fn id="t002fn004"><p><sup>d</sup> There were a few exceptions to this in the first subcorpus, which had 2–9 texts (mean = 6.43, SD = 1.79); these cases (14 out of 855, 1.64%) are listed in the OSF “Sample information” under “Cases with fewer than 10 text”.</p></fn>
<fn id="t002fn005"><p><sup>e</sup> The difference in the number of texts is because the parallel sample contains data for 5 L1s out of the original 9, and so contains 55.85% of the total texts available in the first subcorpus, and 55.56% of those available in the second subcorpus.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec012">
<title>Word usage</title>
<p>To assess learners’ use of L2 vocabulary, we calculated the number of times each English word in the lexical-distance datasets appears in any given text in the learner sample. We did this separately for each cross of one of the lexical-distance datasets with one of the EFCAMDAT subcorpora, as shown in <xref ref-type="table" rid="pone.0281137.t003">Table 3</xref>. Note that we calculated counts based on a spelling-corrected version of each text, as discussed in Appendix S4 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Spelling correction”).</p>
<table-wrap id="pone.0281137.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.t003</object-id>
<label>Table 3</label> <caption><title>The four final samples, each representing a cross between a lexical-distance dataset and a subcorpus.</title> <p><italic>Observations</italic> equal the number of <italic>words per L1</italic> in a lexical-distance dataset times the number of <italic>texts</italic> available in the subcorpus.</p></caption>
<alternatives>
<graphic id="pone.0281137.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.t003" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Distance dataset</th>
<th align="left">Subcorpus</th>
<th align="center">L1s</th>
<th align="center">Words per L1</th>
<th align="center">Texts <xref ref-type="table-fn" rid="t003fn001"><sup>a</sup></xref></th>
<th align="center">Observations</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Swadesh lists</td>
<td align="left">first</td>
<td align="right">9</td>
<td align="right">25</td>
<td align="right">8,500</td>
<td align="right">212,500</td>
</tr>
<tr>
<td align="left">Swadesh lists</td>
<td align="left">second</td>
<td align="right">9</td>
<td align="right">25</td>
<td align="right">6,390</td>
<td align="right">159,750</td>
</tr>
<tr>
<td align="left">Parallel dictionaries</td>
<td align="left">first</td>
<td align="right">5</td>
<td align="right">1,103</td>
<td align="right">4,747</td>
<td align="right">5,235,941</td>
</tr>
<tr>
<td align="left">Parallel dictionaries</td>
<td align="left">second</td>
<td align="right">5</td>
<td align="right">1,103</td>
<td align="right">3,550</td>
<td align="right">3,915,650</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t003fn001"><p><sup>a</sup> The number of texts available for the parallel-dictionaries samples reflects them containing data for 5 out of 9 L1s that we examine.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Statistics about the counts of target words appear in <xref ref-type="table" rid="pone.0281137.t004">Table 4</xref>. For more information on the raw response variable, see the section on “Correlations of distance, frequency, and word use” in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>. In addition, we also built models looking only at the presence/absence of target words, as shown in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Binary-response models”), which replicated the results of the count-based models.</p>
<table-wrap id="pone.0281137.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.t004</object-id>
<label>Table 4</label> <caption><title>Statistics about the distribution of the count data that was used in the models (i.e., the number of times a word appeared in a text).</title> <p>The specific statistics are given either for <italic>total</italic> cases, or for cases where the count was greater than zero (<italic>count&gt;0</italic>).</p></caption>
<alternatives>
<graphic id="pone.0281137.t004g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.t004" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left">Dataset</th>
<th align="left">Subcorpus</th>
<th align="center">N<sub>(total)</sub></th>
<th align="center">N<sub>(count&gt;0)</sub></th>
<th align="center">Prop.<sub>(count&gt;0)</sub> <xref ref-type="table-fn" rid="t004fn002"><sup>a</sup></xref></th>
<th align="center">Mean<sub>(total)</sub></th>
<th align="center">SD<sub>(total)</sub></th>
<th align="center">Mean<sub>(count&gt;0)</sub></th>
<th align="center">SD<sub>(count&gt;0)</sub></th>
<th align="center">Max</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">Swadesh</td>
<td align="left">first</td>
<td align="right">212,500</td>
<td align="right">13,049</td>
<td align="right">0.061</td>
<td align="right">0.174</td>
<td align="right">0.968</td>
<td align="right">2.832</td>
<td align="right">2.782</td>
<td align="right">24</td>
</tr>
<tr>
<td align="left">Swadesh</td>
<td align="left">second</td>
<td align="right">159,750</td>
<td align="right">9,819</td>
<td align="right">0.061</td>
<td align="right">0.188</td>
<td align="right">1.104</td>
<td align="right">3.063</td>
<td align="right">3.323</td>
<td align="right">26</td>
</tr>
<tr>
<td align="left">Parallel</td>
<td align="left">first</td>
<td align="right">5,235,941</td>
<td align="right">59,566</td>
<td align="right">0.011</td>
<td align="right">0.016</td>
<td align="right">0.183</td>
<td align="right">1.417</td>
<td align="right">0.973</td>
<td align="right">19</td>
</tr>
<tr>
<td align="left">Parallel</td>
<td align="left">second</td>
<td align="right">3,915,650</td>
<td align="right">47,072</td>
<td align="right">0.012</td>
<td align="right">0.017</td>
<td align="right">0.196</td>
<td align="right">1.452</td>
<td align="right">1.058</td>
<td align="right">15</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t004fn001"><p><italic>Note</italic>. The difference in distributions between the parallel dictionaries and Swadesh lists could be attributed, at least in part, to the parallel dictionaries containing some lower-frequency words. Specifically, the mean Zipf frequency in the Swadesh lists was 5.24 (SD = 0.72, median = 5.14, range = 4.15–7.11), while the mean Zipf frequency in the parallel dictionaries was 4.37 (SD = 0.84, median = 4.35, range = 1.87–7.41).</p></fn>
<fn id="t004fn002"><p><sup>a</sup> This represents the proportion of entries with a count greater than 0, out of all entries in the sample.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Broadly, the data can be characterized as having (1) a high proportion of zeros and (2) a right skew, which means that there were many cases where a target was not used in a text, and a small number of cases where a target word was used in a text multiple times. This means that most words are not used in most texts, and that some words are also not used in any of the texts, which is expected, given that we include specialized “high level” (i.e., low frequency) words in our sample. However, the inclusion of such words does not pose an issue for our models, as indicated by the model diagnostics that we discuss later, as well as the precise coefficient estimates for our predictors. In addition, note that removing such words from our sample would bias the results.</p>
<p>This distribution is common for count data, and is expected given the diverse range of tasks and words in our sample, including the spectrum of low- and high-frequency words. It should <italic>not</italic> be interpreted as indicating overdispersion or zero-inflation, since those are features of a model rather than the response variable [<xref ref-type="bibr" rid="pone.0281137.ref041">41</xref>]. Indeed, the assumption checking (in the “Model diagnostics” section of Appendix S4 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>) show that the models are not overdispersed or zero-inflated; rather, some actually have underdispersion, though as shown in the aforementioned section, this does not substantially influence our results. Also, as noted in the next section, we used Poisson models in our analyses, since they are designed for dealing with this type of count data, and due to the large size of the samples, there was a sufficient number of “positive” observations (i.e., with a count &gt; 0) that the models were able to converge properly.</p>
<p>Furthermore, our results—as well as the use of Poisson models—were supported by the supplementary logistic-regression models that we built, which used a binary response variable (as shown in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>, under “Binary-response models”).</p>
</sec>
<sec id="sec013">
<title>Data analysis</title>
<p>We built <italic>generalized linear mixed-models</italic> (GLMMs), separately for each combination of subcorpus and lexical-distance dataset (e.g., Swadesh lists and the first subcorpus). Specifically, we built <italic>Poisson</italic> models (with the canonical <italic>log</italic> link), due to the use of count data in the response variable [<xref ref-type="bibr" rid="pone.0281137.ref042">42</xref>, <xref ref-type="bibr" rid="pone.0281137.ref043">43</xref>]. The structure of the models was as follows (the formula we used appears in Appendix S4 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>, under “Model formula”):</p>
<list list-type="order">
<list-item><p><bold>Response variable:</bold> <italic>Rate of usage</italic> of the target English word. This is based on the count of the target English word in a text (i.e., the number of times it appears in it), which is then <italic>offset</italic> by the total number of words in the text (specifically, it is offset by the <italic>log</italic> of the wordcount—an exposure variable that is based on the <italic>wordcount</italic> variable in the EFCAMDAT Cleaned Subcorpus—since the log is the canonical link function for Poisson models). This is needed to control for different texts having a different total number of words, and produces a rate at which target words occur per word in the text [<xref ref-type="bibr" rid="pone.0281137.ref042">42</xref>, <xref ref-type="bibr" rid="pone.0281137.ref043">43</xref>]. In addition, we built supplementary models with a <italic>binary</italic> response variable, based on whether a target word was used in a text or not. Essentially, while the main models focused on the target words as <italic>tokens</italic>, by examining their counts, these models focused on them as <italic>types</italic>, by examining their presence/absence. These models replicated the results of the main models, as shown in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Binary-response models”).</p></list-item>
<list-item><p><bold>Predictors</bold>:
<list list-type="alpha-lower">
<list-item><p><italic>Lexical distance</italic> (of individual L1-L2 word pairs), based on the phonological LDN between the English word and its closest synonym in the L1 of the learner who wrote the text.</p></list-item>
<list-item><p><italic>L2 proficiency</italic>, based on EFCAMDAT proficiency level (1–12, corresponding to CEFR A1–B2) of the learner at the time they wrote the text, as each task in the dataset is classified under a specific proficiency level. This predictor is used to statistically control for the inclusion of multiple L2 proficiency levels in the sample, and enables us to isolate the effects of lexical distance on the rate of use of the target rate, once L2 proficiency is accounted for. Essentially, it allows us to determine whether learners at the same L2 proficiency levels differ in their word choice, while including a range of L2 proficiency levels in our sample.</p></list-item>
<list-item><p><italic>Interaction between lexical distance and L2 proficiency</italic>, to see whether the effects of L2 proficiency moderate those of lexical distance, and especially whether lexical distance has a stronger effect at lower proficiency levels.</p></list-item>
<list-item><p><italic>Word frequency</italic> of each English word (based on its baseline frequency in the English language), to control for this factor when considering the word’s rate of usage in the L2 texts. We also built supplementary models with potential interactions between <italic>distance</italic>/<italic>frequency</italic>, <italic>proficiency</italic>/<italic>frequency</italic>, and <italic>distance</italic>/<italic>proficiency</italic>/<italic>frequency</italic>, which replicated the findings of the main models, as shown under “Added-interactions models” in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>.</p></list-item>
</list></p></list-item>
<list-item><p><bold>Random effects</bold> (random intercepts unless noted otherwise):
<list list-type="alpha-lower">
<list-item><p><italic>Learner</italic>, to control for learners who had more than one text in the sample. Most learners only had a single text in the sample (the mean number of texts per learner was 1.36 in the first subcorpus and 1.41 in the second). Multiple texts per learner were included to achieve sufficient coverage of the sample, in line with prior studies on the EFCAMDAT [e.g., <xref ref-type="bibr" rid="pone.0281137.ref039">39</xref>, <xref ref-type="bibr" rid="pone.0281137.ref044">44</xref>, <xref ref-type="bibr" rid="pone.0281137.ref045">45</xref>]. See the “Sample information” document in the OSF repository for more details (under “Number of texts per learner”).</p></list-item>
<list-item><p><italic>L1</italic>, with random slopes for <italic>lexical distance</italic>, to control for any additional effects from the learners’ L1 and their associated (e.g., cultural) background.</p></list-item>
<list-item><p><italic>Task</italic>, to control for all the aspects of each writing task that can influence word choice, such as its prompt, with the exception of the task’s associated L2 proficiency level, which we control for using the relevant predictor. This approach accounts for all aspects of task effects in aggregate, without disentangling its different aspects; for more information, see Appendix S4 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Task random effect”).</p></list-item>
<list-item><p><italic>Word</italic>, to control for any word-level effects beyond those of distance (e.g., pragmatic factors), in a similar manner as for <italic>task</italic>.</p></list-item>
<list-item><p><italic>Task</italic>:<italic>Word</italic>, to control for the interaction between <italic>task</italic> and <italic>word</italic>, and particularly cases where a certain task is more likely to prompt the use of a certain word.</p></list-item>
</list></p></list-item>
</list>
<p>We tried adding other random effects, but this led to convergence issues, and even in cases where the models converged, their key results were the same as they were for these models. For more information, see Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Models with alternative random effects”).</p>
<p>Before building the models, we scaled the distance predictor by a factor of 10, so that it is on a scale of 0–10 instead of 0–1. This facilitates convergence, by putting this predictor on a similar scale as the other predictors (L2 proficiency: 1–12, frequency: ~1–7.5). We also centered the predictors, to facilitate convergence of the models and reduce potential collinearity.</p>
<p>After building the models, we exponentiated the coefficient estimates to derive an <italic>incidence rate ratio</italic> (IRR), and scaled the <italic>standard errors</italic> (SEs) accordingly [<xref ref-type="bibr" rid="pone.0281137.ref042">42</xref>]. The IRR is the expected change in the rate of the response as a factor of a 1-unit increase in the predictor. Accordingly, an IRR of 2 means a 1-unit increase in the predictor doubles the rate of use of the target word, while an IRR of 0.5 means a 1-unit increase in the predictor halves it. An IRR of 1 corresponds to a coefficient estimate (<italic>B</italic>) of 0. For more information, see “Incidence rate ratio” in Appendix S4 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>.</p>
<p>In addition, we checked the statistical assumptions of the models. The relevant diagnostics appear in Appendix S4 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Model diagnostics”), and indicate that there are no substantial issues with the models.</p>
<p>Finally, we also compared these models with baseline models, which did not include lexical distance as a predictor, to determine whether the inclusion of lexical distance improves the models’ predictive power (based on AIC and BIC).</p>
</sec>
</sec>
<sec id="sec014" sec-type="results">
<title>Results</title>
<p><xref ref-type="fig" rid="pone.0281137.g003">Fig 3</xref> contains plots showing the basic association between distance and the rate of use of words in the datasets, compared to their baseline frequency in English. For the associated statistics, see “Frequency-ratio descriptive statistics” in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>.</p>
<fig id="pone.0281137.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.g003</object-id>
<label>Fig 3</label>
<caption>
<title>The <italic>lexical distance</italic> of words and their <italic>frequency ratio</italic> (i.e., their frequency in the sample divided by their baseline frequency in English).</title>
<p>A ratio = 1 (grey line) indicates that a word is used in equal rates in our learner sample and baseline English; a ratio &gt;1 indicates a word is used more frequently in our sample, and a ratio &lt;1 indicates the opposite; a ratio = 0 indicates the word does not appear in our sample. Each point is a combination of a target word and a specific L1, since words in different L1s can have different distances from English. Darker shading indicates an overlap in points.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.g003" xlink:type="simple"/>
</fig>
<p>If there is facilitative influence of crosslinguistic similarity, then we would expect words with a lower lexical distance (i.e., higher similarity) to have a higher frequency ratio. However, such effect is not visible in the plots, as the frequency ratio seems independent of lexical distance. Nevertheless, since this analysis is limited (e.g., it does not control for task effects), we move on to the more comprehensive mixed-models.</p>
<p><xref ref-type="table" rid="pone.0281137.t005">Table 5</xref> contains the results of the mixed-models for the Swadesh lists. There is essentially no effect of distance or of its interaction with L2 proficiency, as the associated effect sizes are almost exactly zero (B = -0.01–0.00, corresponding to IRR = 0.99–1.00). Given this, and given that the associated SEs are also very small (≤0.01 for both B and IRR), this lack of effect is robust within this sample.</p>
<table-wrap id="pone.0281137.t005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.t005</object-id>
<label>Table 5</label> <caption><title>Results of the mixed-models, for the Swadesh-based samples.</title> <p>The response variable was the rate of use of the target L2 English words (i.e., their count offset by the total number of words in each text). Under <italic>fixed effects</italic>, <italic>distance</italic> is the phonological LDN between each L2 word and its most lexically similar L1 counterpart (originally 0–1, scaled to 0–10), <italic>proficiency</italic> is the EFCAMDAT L2 proficiency level at which the text was written (1–12, corresponding to CEFR A1–B2), and <italic>frequency</italic> is the baseline Zipf frequency of the target word in English (~1–7.5). Under random effects, <italic>τ</italic><sub><italic>00</italic></sub> and <italic>τ</italic><sub><italic>11</italic></sub> respectively represent the SD of the associated random intercepts and slopes, and <italic>ρ</italic><sub><italic>01</italic></sub> represents the correlation between random intercepts and associated random slopes (here, <italic>distance</italic> for <italic>L1</italic>).</p></caption>
<alternatives>
<graphic id="pone.0281137.t005g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.t005" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center" colspan="6">First subcorpus</th>
<th align="center" colspan="6">Second subcorpus</th>
</tr>
<tr>
<th align="left"><italic>Predictor</italic></th>
<th align="center"><italic>B</italic></th>
<th align="center"><italic>SE</italic><sub>B</sub></th>
<th align="center"><italic>IRR</italic></th>
<th align="center"><italic>SE</italic><sub><italic>IRR</italic></sub></th>
<th align="center"><italic>Z</italic></th>
<th align="center"><italic>p</italic></th>
<th align="center"><italic>B</italic></th>
<th align="center"><italic>SE</italic><sub><italic>B</italic></sub></th>
<th align="center"><italic>IRR</italic></th>
<th align="center"><italic>SE</italic><sub><italic>IRR</italic></sub></th>
<th align="center"><italic>z</italic></th>
<th align="center"><italic>p</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">(Intercept)</td>
<td align="right">-10.32</td>
<td align="right">0.16</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">-65.40</td>
<td align="right">&lt; .001</td>
<td align="right">-9.86</td>
<td align="right">0.14</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">-68.45</td>
<td align="right">&lt; .001</td>
</tr>
<tr>
<td align="left">Distance</td>
<td align="right">-0.01</td>
<td align="right">0.01</td>
<td align="right">0.99</td>
<td align="right">0.01</td>
<td align="right">-1.17</td>
<td align="right">.243</td>
<td align="right">-0.01</td>
<td align="right">0.01</td>
<td align="right">0.99</td>
<td align="right">0.01</td>
<td align="right">-0.36</td>
<td align="right">.718</td>
</tr>
<tr>
<td align="left">Proficiency</td>
<td align="right">-0.04</td>
<td align="right">0.02</td>
<td align="right">0.96</td>
<td align="right">0.02</td>
<td align="right">-2.12</td>
<td align="right">.034</td>
<td align="right">0.00</td>
<td align="right">0.02</td>
<td align="right">1.00</td>
<td align="right">0.02</td>
<td align="right">-0.22</td>
<td align="right">.829</td>
</tr>
<tr>
<td align="left">Frequency</td>
<td align="right">3.30</td>
<td align="right">0.21</td>
<td align="right">26.99</td>
<td align="right">5.66</td>
<td align="right">15.70</td>
<td align="right">&lt; .001</td>
<td align="right">3.16</td>
<td align="right">0.19</td>
<td align="right">23.53</td>
<td align="right">4.50</td>
<td align="right">16.50</td>
<td align="right">&lt; .001</td>
</tr>
<tr>
<td align="left">Dist:Prof</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">1.00</td>
<td align="right">&lt;0.01</td>
<td align="right">0.61</td>
<td align="right">.543</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">1.00</td>
<td align="right">&lt;0.01</td>
<td align="right">-1.28</td>
<td align="right">.202</td>
</tr>
<tr>
<td align="left"><italic>Random effects</italic></td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Learner_τ<sub>00</sub></td>
<td align="right">0.07</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.23</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Task_τ<sub>00</sub></td>
<td align="right">0.40</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.33</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Word_τ<sub>00</sub></td>
<td align="right">0.38</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.46</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Task:Word_τ<sub>00</sub></td>
<td align="right">1.84</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">1.36</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">L1_τ<sub>00</sub></td>
<td align="right">0.02</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.03</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">L1.Distance_τ<sub>11</sub></td>
<td align="right">0.01</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.03</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">L1_ρ<sub>01</sub></td>
<td align="right">0.55</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">-0.14</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>In addition, there is almost no variance between the L1s based on the associated random effect (SD ≤ 0.03), which suggests that speakers of different L1s used the target words in similar rates. However, this should be interpreted with caution, since this variance is likely underestimated due to the small number of L1s. Nevertheless, its exact magnitude is not crucial to our study, since we focus on the effects of distance, and as shown in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Models with alternative random effects”), the models’ estimates remain functionally identical when the L1 random effect is not included.</p>
<p>By contrast, the random effects of <italic>task</italic> and <italic>word</italic> are stronger than the <italic>L1</italic> effect by an order of magnitude or more (SD = 0.33–0.46), and the <italic>task</italic>:<italic>word</italic> effect is even stronger (SD = 1.36–1.84), which shows that these factors, and primarily the need to use specific words in specific tasks, have a much stronger influence on learners’ rate of use of L2 words. Similarly, <italic>frequency</italic> as a control variable also has a very strong effect (B = 3.16–3.30, corresponding to IRR = 23.53–26.99), which was expected since the response variable is a type of frequency measure.</p>
<p><xref ref-type="table" rid="pone.0281137.t006">Table 6</xref> contains the results of the mixed-models based on the parallel dictionaries. The findings of these models support those of the Swadesh-based models. Specifically, there is essentially no effect of distance or of its interaction with proficiency (B = 0.00–0.01, corresponding to IRR = 1.00–1.01), and the associated SEs are also very small (≤0.01 for both B and IRR). In addition, as in the Swadesh-based models, there is almost no variance based on the <italic>L1</italic> random effect (SD ≤ 0.01), though the number of L1s included is even smaller, which again necessitates caution in the interpretation of the exact magnitude of this effect.</p>
<table-wrap id="pone.0281137.t006" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.t006</object-id>
<label>Table 6</label> <caption><title>Results of the mixed-models, for the parallel-based samples.</title> <p>The response variable was the rate of use of the target L2 English words (i.e., their count offset by the total number of words in each text). Under <italic>fixed effects</italic>, <italic>distance</italic> is the phonological LDN between each L2 word and its most lexically similar L1 counterpart (originally 0–1, scaled to 0–10), <italic>proficiency</italic> is the EFCAMDAT L2 proficiency level at which the text was written (1–12, corresponding to CEFR A1–B2), and <italic>frequency</italic> is the baseline Zipf frequency of the target word in English (~1–7.5). Under random effects, <italic>τ</italic><sub><italic>00</italic></sub> and <italic>τ</italic><sub><italic>11</italic></sub> respectively represent the SD of the associated random intercepts and slopes, and <italic>ρ</italic><sub><italic>01</italic></sub> represents the correlation between random intercepts and associated random slopes (here, <italic>distance</italic> for <italic>L1</italic>).</p></caption>
<alternatives>
<graphic id="pone.0281137.t006g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.t006" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="center" colspan="6">First subcorpus</th>
<th align="center" colspan="6">Second subcorpus</th>
</tr>
<tr>
<th align="left"><italic>Predictor</italic></th>
<th align="center"><italic>B</italic></th>
<th align="center"><italic>SE</italic><sub><italic>B</italic></sub></th>
<th align="center"><italic>IRR</italic></th>
<th align="center"><italic>SE</italic><sub><italic>IRR</italic></sub></th>
<th align="center"><italic>Z</italic></th>
<th align="center"><italic>p</italic></th>
<th align="center"><italic>B</italic></th>
<th align="center"><italic>SE</italic><sub><italic>B</italic></sub></th>
<th align="center"><italic>IRR</italic></th>
<th align="center"><italic>SE</italic><sub><italic>IRR</italic></sub></th>
<th align="center"><italic>z</italic></th>
<th align="center"><italic>p</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="left">(Intercept)</td>
<td align="right">-12.85</td>
<td align="right">0.06</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">-207.79</td>
<td align="right">&lt; .001</td>
<td align="right">-12.59</td>
<td align="right">0.05</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">-243.41</td>
<td align="right">&lt; .001</td>
</tr>
<tr>
<td align="left">Distance</td>
<td align="right">0.01</td>
<td align="right">&lt;0.01</td>
<td align="right">1.01</td>
<td align="right">&lt;0.01</td>
<td align="right">1.91</td>
<td align="right">.056</td>
<td align="right">0.01</td>
<td align="right">0.01</td>
<td align="right">1.01</td>
<td align="right">0.01</td>
<td align="right">1.04</td>
<td align="right">.301</td>
</tr>
<tr>
<td align="left">Proficiency</td>
<td align="right">0.11</td>
<td align="right">0.01</td>
<td align="right">1.12</td>
<td align="right">0.01</td>
<td align="right">9.22</td>
<td align="right">&lt; .001</td>
<td align="right">0.04</td>
<td align="right">0.01</td>
<td align="right">1.04</td>
<td align="right">0.01</td>
<td align="right">4.29</td>
<td align="right">&lt; .001</td>
</tr>
<tr>
<td align="left">Frequency</td>
<td align="right">2.89</td>
<td align="right">0.06</td>
<td align="right">18.08</td>
<td align="right">1.05</td>
<td align="right">49.86</td>
<td align="right">&lt; .001</td>
<td align="right">2.97</td>
<td align="right">0.05</td>
<td align="right">19.50</td>
<td align="right">0.99</td>
<td align="right">58.52</td>
<td align="right">&lt; .001</td>
</tr>
<tr>
<td align="left">Dist:Prof</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">1.00</td>
<td align="right">&lt;0.01</td>
<td align="right">1.25</td>
<td align="right">.211</td>
<td align="right">0.00</td>
<td align="right">&lt;0.01</td>
<td align="right">1.00</td>
<td align="right">&lt;0.01</td>
<td align="right">1.09</td>
<td align="right">.276</td>
</tr>
<tr>
<td align="left"><italic>Random effects</italic></td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Learner_τ<sub>00</sub></td>
<td align="right">0.03</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.04</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Task_τ<sub>00</sub></td>
<td align="right">0.03</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.11</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Word_τ<sub>00</sub></td>
<td align="right">0.45</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.65</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">Task:Word_τ<sub>00</sub></td>
<td align="right">2.30</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">1.50</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">L1_τ<sub>00</sub></td>
<td align="right">0.00</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.01</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">L1.Distance_τ<sub>11</sub></td>
<td align="right">0.01</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.01</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
<tr>
<td align="left">L1_ρ<sub>01</sub></td>
<td align="right">0.25</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right">0.81</td>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
<td align="right"/>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
<p>A minor difference is that there is lower variance in the <italic>task</italic> random effect here (SD = 0.03–0.11). However, there is also greater variance based on the <italic>word</italic> and <italic>task</italic>:<italic>word</italic> effects (SD = 0.45–0.65 and SD = 1.50–2.30 respectively). This supports the overall findings in this regard from the Swadesh models, which is that the need to use specific L2 words in specific tasks strongly influences learners’ tendency to use those words. Finally, and as expected, frequency is a substantial predictor here too (B = 2.89–2.97, IRR = 18.08–19.50).</p>
<p>The results of the models are summarized in <xref ref-type="fig" rid="pone.0281137.g004">Fig 4</xref>, which contains the fixed effects from each model, and which illustrates the lack of effect of lexical distance and of its interaction with L2 proficiency. Furthermore, these results are supported by the comparisons with the baseline models (with no lexical distance), which appear in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Baseline models”).</p>
<fig id="pone.0281137.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0281137.g004</object-id>
<label>Fig 4</label>
<caption>
<title>The models’ fixed effects, illustrating the lack of effect of lexical distance and its interaction with L2 proficiency.</title>
<p><italic>Distance</italic> is the phonological LDN between each L2 word and its most lexically similar L1 counterpart (scaled to 0–10), <italic>proficiency</italic> is the EFCAMDAT L2 proficiency level at which the text was written (scale of 1–12), and <italic>frequency</italic> is the baseline Zipf frequency of the target word in English (scale of ~1–7.5). Dots denote the IRR. Lines denote the 95% CIs; where they seem missing, it is because they are very narrow. Asterisks denote statistical significance of the coefficient estimate (* denotes <italic>p</italic> &lt; .05 and *** denotes <italic>p</italic> &lt; .001).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.g004" xlink:type="simple"/>
</fig>
<p>Note that the effects of proficiency itself were weak and inconsistent across the models. However, it appears that there is a weak positive effect of proficiency for the parallel-dictionaries samples, likely because they contain some words that are lower-frequency than in the Swadesh lists. This suggests there is an interaction between proficiency and frequency, and this is supported by the “Added-interactions models” shown in Appendix S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>, which also show that including this interaction in the models causes convergence issues, and does not change our key findings.</p>
</sec>
<sec id="sec015" sec-type="conclusions">
<title>Discussion</title>
<sec id="sec016">
<title>Study summary</title>
<p>We investigated whether formal crosslinguistic lexical similarity (phonological overlap) between L1 words and their L2 counterparts increases the use of the L2 words in a task-based educational setting, and whether this is moderated by L2 proficiency.</p>
<p>We found no effect of crosslinguistic similarity on L2 vocabulary use, and no interaction between lexical similarity and L2 proficiency. This null finding was robust across all the combinations of the two subcorpora and two lexical-distance datasets that we examined, since all the associated predictors were tightly clustered around an IRR of 1 (corresponding to a coefficient estimate of 0), as shown in Tables <xref ref-type="table" rid="pone.0281137.t005">5</xref> and <xref ref-type="table" rid="pone.0281137.t006">6</xref> and in <xref ref-type="fig" rid="pone.0281137.g004">Fig 4</xref>. In addition, there was very low variance between the L1s based on the associated random effect (Tables <xref ref-type="table" rid="pone.0281137.t005">5</xref> and <xref ref-type="table" rid="pone.0281137.t006">6</xref>), which suggests that speakers of different L1 used the target words in similar rates, despite the variation in the average lexical distance between them (shown in <xref ref-type="fig" rid="pone.0281137.g001">Fig 1</xref> and <xref ref-type="table" rid="pone.0281137.t001">Table 1</xref>). Conversely, the <italic>task</italic>, <italic>word</italic>, and especially the <italic>task</italic>:<italic>word</italic> random effects strongly influenced learner’s word choices (Tables <xref ref-type="table" rid="pone.0281137.t005">5</xref> and <xref ref-type="table" rid="pone.0281137.t006">6</xref>), which shows that these factors, and primarily the need to use specific words in specific tasks, have a much stronger influence on people’s L2 vocabulary choices.</p>
<p>In addition, these results replicated across a range of supplementary analyses that we conducted, which appear in Appendices S2 and S5 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref>. These include models using feature edit distance, models using German-only data, models using a binary response variable, and models with added interactions.</p>
</sec>
<sec id="sec017">
<title>Main implications</title>
<p>The main implication of our findings is that formal crosslinguistic lexical similarity (in this case, phonological overlap), which relates to cognancy, does not influence learners’ L2 productions in the type of constrained task-based educational setting we examined, which many L2 learners are likely to encounter. This is regardless of learners’ L2 proficiency, and applies to learners at the A1–B2 CEFR range of L2 proficiency, though the complete lack of interaction between lexical similarity and L2 proficiency that we found suggests that this likely applies also to learners at the C1–C2 range of proficiency.</p>
<p>This finding supports the lexical intergroup homogeneity that Crossley and McNamara found among speakers of different L1s in a task-based setting (the ICLE) [<xref ref-type="bibr" rid="pone.0281137.ref025">25</xref>]. This suggests that the lack of L1 effect that they found is not due to their use of a global lexical measure (lexical diversity) or an idiosyncrasy in their sample, but is rather more likely a general feature of L2 lexical production in constrained task-based settings.</p>
<p>At the same time, this does not necessarily contradict studies that found an L1 effect on L2 word choice independently of crosslinguistic lexical similarity (e.g., in stylometry). Rather, the difference may be that the L1 effect found in those studies was driven by factors other than crosslinguistic similarity, such as a strong cultural preference for certain words (e.g., <italic>hockey</italic>), or that there were weaker task effects in their samples (e.g., because the prompts were less constrained).</p>
<p>Our finding also does not necessarily contradict the studies that found an effect of lexical similarity on the processing of individual L2 words or on broad L2 acquisition. Rather, it shows that this effect is different in this specific form of L2 production, where word choice is primarily driven by task-related factors, such as a specific message the learner needs to communicate. This interpretation is supported by the strong effects of <italic>task</italic>, <italic>word</italic>, and <italic>task</italic>:<italic>word</italic> on word choice, which suggest that the need to use a specific word for a specific task is what drives learners’ decision of whether to use it in the present context, regardless of whether the word is similar to their L1.</p>
<p>Accordingly, although L2 words that are similar to their L1 translation are likely easier for learners to access and use, the communicative needs of tasks can override this crosslinguistic influence, and drive learners to use necessary words rather than easier ones. This means that even if the facilitative effect of L1 similarity is there, which we expect is the case, its influence is too weak to drive learners’ word choice in the present setting.</p>
<p>In addition, it is likely that other aspects of the tasks and their educational context played a role in determining word choice, and can play a role in similar contexts (especially—but not only—educational ones). For example, it is likely that the lessons associated with tasks involved words (i.e., <italic>content</italic>) that learners then used for practice, or that some task prompts elicited the use of a specific register (i.e., <italic>style</italic>) that necessitated the use of certain words. This supports and extends limited past research which found that factors such as formality and task type may influence transfer [<xref ref-type="bibr" rid="pone.0281137.ref021">21</xref>], and highlights the importance of considering these situational and contextual factors when investigating transfer.</p>
<p>Finally, note that past studies on the EFCAMDAT found L1 transfer effects on various other linguistic structures and phenomena, including clause subordination [<xref ref-type="bibr" rid="pone.0281137.ref046">46</xref>], relative clauses [<xref ref-type="bibr" rid="pone.0281137.ref047">47</xref>], clause-initial prepositional phrases [<xref ref-type="bibr" rid="pone.0281137.ref048">48</xref>], grammatical morphemes [<xref ref-type="bibr" rid="pone.0281137.ref049">49</xref>], articles [<xref ref-type="bibr" rid="pone.0281137.ref050">50</xref>], and capitalization [<xref ref-type="bibr" rid="pone.0281137.ref044">44</xref>]. X. Jiang et al. even found evidence of lexical transfer on the usage rates of certain punctuation marks (e.g., dashes) and phrases (e.g., “to my mind”) [<xref ref-type="bibr" rid="pone.0281137.ref048">48</xref>].</p>
<p>The reason why they found an effect in this sample whereas we did not could be that the types of transfer involved in the structures they examined might not be as strongly influenced by communicative needs and task effects. For example, if a speaker wants to convey the meaning “I ate an apple”, saying “apple” (a key content word) is generally more important than saying “an” (a functional element), since “I ate apple” conveys the original meaning more clearly than “I ate an”. Alternatively, another potential—and not mutually exclusive—explanation for the difference in the finding is that negative transfer (which was the focus of most of those past studies) may be “stronger” from a cognitive perspective than positive transfer (which was the focus of the present study), and therefore more difficult for communicative needs and task effects to override. This ties in to earlier discussions on the differences between these types of transfer [<xref ref-type="bibr" rid="pone.0281137.ref051">51</xref>].</p>
</sec>
<sec id="sec018">
<title>Task effects in lexical choices</title>
<p>The strong task effects that were found in this study contribute to the growing evidence on the role of these effects in L2 lexical choices [<xref ref-type="bibr" rid="pone.0281137.ref039">39</xref>, <xref ref-type="bibr" rid="pone.0281137.ref045">45</xref>, <xref ref-type="bibr" rid="pone.0281137.ref052">52</xref>–<xref ref-type="bibr" rid="pone.0281137.ref055">55</xref>]. This highlights the importance of controlling for such effects (e.g., the purpose or context of production) when analyzing L2 lexical choices, particularly in learner corpora, where they can often play a substantial role.</p>
</sec>
<sec id="sec019">
<title>Limitations and future research</title>
<p>One limitation of this study is the use of one learner sample, so the analyses should be replicated on other samples, to determine the generalizability of the findings. Such replications can, for example, analyze speaking (rather than writing), analyze a different L2 (since English is a lingua franca), or analyze productions in other settings. It will be particularly beneficial to analyze L2 productions from learners who are writing in similar general settings, but under different levels of the communicative-constraints spectrum. Likewise, it would be interesting to compare written L2 productions to spoken ones, when these are produced by similar learners under similar conditions. This will show whether and how the effect of this crosslinguistic similarity, if it appears, varies across these two modes of language production.</p>
<p>Other limitations are the use of LDN, which does not directly capture information such as cognancy status, and the use of L2 words that often did not appear in learners’ writing. Given all the information we presented (e.g., regarding the distribution of the response variable), we do not think that these limitations explain the null effect that we found. Nevertheless, it will be beneficial to replicate our analyses using other lexical-distance datasets and measures. It will be particularly beneficial to use a dataset such as <italic>CogNet</italic>, to examine the effects of cognancy directly, and to analyze more L2 words.</p>
<p>When doing this, it is also possible to focus on preference for cognates within sets of synonyms corresponding to the same meaning, similarly to Rabinovich et al. [<xref ref-type="bibr" rid="pone.0281137.ref013">13</xref>]. As discussed in Appendix S3 in <xref ref-type="supplementary-material" rid="pone.0281137.s001">S1 File</xref> (under “Analysis of synonym sets”), this can be done by comparing, within each set, the probability that speakers of different L1s will use any given synonym, and checking if their choices reflect a preference for cognates.</p>
<p>In addition, future research could also refine these analyses by accounting for further factors. For example, it might be beneficial to look at the baseline L1 frequency of words within specific genres that correspond to the associated writing tasks, rather than in the L1 as a whole. Similarly, it may be beneficial to examine the effects of genre and formality on the crosslinguistic influence that learners display in their L2 productions.</p>
<p>Finally, future research could also address the questions outlined in the discussion of the study’s main implications. Notably, this could involve comparing the effects of communicative needs on different types of transfer, such as positive vs. negative transfer, or lexical vs. syntactic transfer.</p>
</sec>
</sec>
<sec id="sec020" sec-type="conclusions">
<title>Conclusions</title>
<p>In the present task-based educational settings, formal lexical similarity—which relates to cognancy and which we based on phonological overlap between corresponding L1-L2 words—did not influence L2 word choice, regardless of learners’ L2 proficiency. This suggests that the effects of formal lexical similarity are more constrained than expected, and that communicative needs and task effects can sometimes override the influence of positive lexical transfer. This raises questions regarding when and how communicative needs and task effects influence language transfer, for example in different types of transfer (e.g., lexical vs. syntactic, positive vs. negative).</p>
</sec>
<sec id="sec021" sec-type="supplementary-material">
<title>Supporting information</title>
<supplementary-material id="pone.0281137.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.s001" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Supplementary appendices.</title>
<p>This includes Appendix S1 (Lexical distance), Appendix S2 (Feature edit distance), Appendix S3 (Additional descriptive information), Appendix S4 (Additional technical information), and Appendix S5 (Additional models).</p>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>This research is based on the PhD thesis of the first author [<xref ref-type="bibr" rid="pone.0281137.ref056">56</xref>].</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0281137.ref001"><label>1</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Jarvis</surname> <given-names>S.</given-names></name> <chapter-title>Lexical transfer</chapter-title>. In: <name name-style="western"><surname>Pavlenko</surname> <given-names>A</given-names></name>, editor. <source>The bilingual mental lexicon: Interdisciplinary approaches</source>. <publisher-loc>Clevedon, UK</publisher-loc>: <publisher-name>Multilingual Matters</publisher-name>; <year>2009</year>. p. <fpage>99</fpage>–<lpage>124</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref002"><label>2</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Ringbom</surname> <given-names>H.</given-names></name> <source>Cross-linguistic similarity in foreign language learning</source>. <publisher-loc>Clevedon, UK</publisher-loc>: <publisher-name>Multilingual Matters</publisher-name>; <year>2007</year>.</mixed-citation></ref>
<ref id="pone.0281137.ref003"><label>3</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Odlin</surname> <given-names>T.</given-names></name> <chapter-title>Crosslinguistic influence in second language acquisition</chapter-title>. In: <name name-style="western"><surname>Chapelle</surname> <given-names>CA</given-names></name>, editor. <source>The ecyclopedia of applied linguistics</source>. <publisher-loc>Oxford, UK</publisher-loc>: <publisher-name>Blackwell Publishing</publisher-name>; <year>2013</year>. p. <fpage>1562</fpage>–<lpage>1568</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref004"><label>4</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Jarvis</surname> <given-names>S.</given-names></name> <chapter-title>Transfer: An overview with an expanded scope</chapter-title>. In: <name name-style="western"><surname>Golden</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Jarvis</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Tenfjord</surname> <given-names>K</given-names></name>, editors. <source>Crosslinguistic influence and distinctive patterns of language learning</source>. <publisher-loc>Bristol, UK</publisher-loc>: <publisher-name>Multilingual Matters</publisher-name>; <year>2017</year>. p. <fpage>12</fpage>–<lpage>28</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Llach</surname> <given-names>MPA</given-names></name>. <article-title>An overview of variables affecting lexical transfer in writing: A review study</article-title>. <source>Int J Linguist</source> [Internet]. <year>2010</year>;<volume>2</volume>(<issue>1</issue>):<fpage>E2</fpage>. Available from: <ext-link ext-link-type="uri" xlink:href="http://macrothink.org/journal/index.php/ijl/article/view/445" xlink:type="simple">http://macrothink.org/journal/index.php/ijl/article/view/445</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref006"><label>6</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Williams</surname> <given-names>JN</given-names></name>. <chapter-title>The bilingual lexicon</chapter-title>. In: <name name-style="western"><surname>Taylor</surname> <given-names>J</given-names></name>, editor. <source>The Oxford handbook of the word</source>. <publisher-name>Oxford University Press</publisher-name>; <year>2015</year>.</mixed-citation></ref>
<ref id="pone.0281137.ref007"><label>7</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Helms-Park</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Dronjic</surname> <given-names>V</given-names></name>. <chapter-title>Crosslinguistic lexical influence: Cognate facilitation</chapter-title>. In: <name name-style="western"><surname>Alonso</surname> <given-names>RA</given-names></name>, editor. <source>Crosslinguistic influence in second language acquisition</source>. <publisher-loc>Bristol, UK</publisher-loc>: <publisher-name>Multilingual Matters</publisher-name>; <year>2013</year>. p. <fpage>71</fpage>–<lpage>92</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Costa</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Caramazza</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Sebastian-Galles</surname> <given-names>N</given-names></name>. <article-title>The cognate facilitation effect: Implications for models of lexical access</article-title>. <source>J Exp Psychol Learn Mem Cogn</source>. <year>2000</year>;<volume>26</volume>(<issue>5</issue>):<fpage>1283</fpage>–<lpage>96</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037//0278-7393.26.5.1283" xlink:type="simple">10.1037//0278-7393.26.5.1283</ext-link></comment> <object-id pub-id-type="pmid">11009258</object-id></mixed-citation></ref>
<ref id="pone.0281137.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Tonzar</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Lotto</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Job</surname> <given-names>R</given-names></name>. <article-title>L2 vocabulary acquisition in children: Effects of learning method and cognate status</article-title>. <source>Lang Learn</source>. <year>2009</year>;<volume>59</volume>(<issue>3</issue>):<fpage>623</fpage>–<lpage>46</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hall</surname> <given-names>CJ</given-names></name>. <article-title>The automatic cognate form assumption: Evidence for the parasitic model of vocabulary development</article-title>. <source>Int Rev Appl Linguist Lang Teach</source>. <year>2002</year>;<volume>40</volume>(<issue>2</issue>):<fpage>69</fpage>–<lpage>87</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Carrasco-Ortiz</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Amengual</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Gries</surname> <given-names>ST</given-names></name>. <article-title>Cross-language effects of phonological and orthographic similarity in cognate word recognition</article-title>. <source>Linguist Approaches to Biling</source>. <year>2021</year>;<volume>11</volume>(<issue>3</issue>):<fpage>389</fpage>–<lpage>417</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Dijkstra</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Miwa</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Brummelhuis</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Sappelli</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Baayen</surname> <given-names>H</given-names></name>. <article-title>How cross-language similarity and task demands affect cognate recognition</article-title>. <source>J Mem Lang</source> [Internet]. <year>2010</year>;<volume>62</volume>(<issue>3</issue>):<fpage>284</fpage>–<lpage>301</lpage>. Available from: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.jml.2009.12.003" xlink:type="simple">https://doi.org/10.1016/j.jml.2009.12.003</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref013"><label>13</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Rabinovich</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Tsvetkov</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Wintner</surname> <given-names>S</given-names></name>. <source>Native language cognate effects on second language lexical choice. In: Transactions of the Association for Computational Linguistics</source> [Internet]. <publisher-name>Association for Computational Linguistics</publisher-name>; <year>2018</year>. p. <fpage>329</fpage>–<lpage>42</lpage>. Available from: <ext-link ext-link-type="uri" xlink:href="https://www.aclweb.org/anthology/Q18-1024/" xlink:type="simple">https://www.aclweb.org/anthology/Q18-1024/</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schepens</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Dijkstra</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Grootjen</surname> <given-names>F</given-names></name>. <article-title>Distributions of cognates in Europe as based on Levenshtein distance</article-title>. <source>Bilingualism</source>. <year>2012</year>;<volume>15</volume>(<issue>1</issue>):<fpage>157</fpage>–<lpage>66</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jiang</surname> <given-names>N.</given-names></name> <article-title>Form-meaning mapping in vocabulary acquisition in a second language</article-title>. <source>Stud Second Lang Acquis</source>. <year>2002</year>;<volume>24</volume>:<fpage>617</fpage>–<lpage>37</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ecke</surname> <given-names>P.</given-names></name> <article-title>Parasitic vocabulary acquisition, cross-linguistic influence, and lexical retrieval in multilinguals</article-title>. <source>Bilingualism</source>. <year>2015</year>;<volume>18</volume>(<issue>2</issue>):<fpage>145</fpage>–<lpage>62</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schepens</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>van Hout</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Jaeger</surname> <given-names>TF</given-names></name>. <article-title>Big data suggest strong constraints of linguistic similarity on adult language learning</article-title>. <source>Cognition</source>. <year>2020</year>;<volume>194</volume>:Article 104056. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1016/j.cognition.2019.104056" xlink:type="simple">10.1016/j.cognition.2019.104056</ext-link></comment> <object-id pub-id-type="pmid">31733600</object-id></mixed-citation></ref>
<ref id="pone.0281137.ref018"><label>18</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Schepens</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>van der Slik</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>van Houta</surname> <given-names>R</given-names></name>. <chapter-title>The effect of linguistic distance across Indo-European mother tongues on learning Dutch as a second language</chapter-title>. In: <name name-style="western"><surname>Borin</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Saxena</surname> <given-names>A</given-names></name>, editors. <source>Approaches to measuring linguistic differences</source>. <publisher-loc>Berlin, Germany</publisher-loc>: <publisher-name>De Gruyter</publisher-name>; <year>2013</year>. p. <fpage>199</fpage>–<lpage>230</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>van der Slik</surname> <given-names>FWP</given-names></name>. <article-title>Acquisition of Dutch as a second language: The explanative power of cognate and genetic linguistic distance measures for 11 West European first languages</article-title>. <source>Stud Second Lang Acquis</source>. <year>2010</year>;<volume>32</volume>(<issue>3</issue>):<fpage>401</fpage>–<lpage>32</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref020"><label>20</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Kellerman</surname> <given-names>E.</given-names></name> <chapter-title>Now you see it, now you don’t</chapter-title>. In: <name name-style="western"><surname>Gass</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Selinker</surname> <given-names>L</given-names></name>, editors. <source>Language transfer in language learning</source>. <publisher-loc>Rowley, MA</publisher-loc>: <publisher-name>Newbury House</publisher-name>; <year>1983</year>. p. <fpage>112</fpage>–<lpage>34</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref021"><label>21</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Jarvis</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Pavlenko</surname> <given-names>A</given-names></name>. <source>Crosslinguistic influence in language and cognition</source>. <publisher-loc>New York, NY</publisher-loc>: <publisher-name>Routledge</publisher-name>; <year>2008</year>.</mixed-citation></ref>
<ref id="pone.0281137.ref022"><label>22</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Jarvis</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Castañeda-Jiménez</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Nielsen</surname> <given-names>R</given-names></name>. <chapter-title>Detecting L2 writers’ L1s on the basis of their lexical styles</chapter-title>. In: <name name-style="western"><surname>Jarvis</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Crossley</surname> <given-names>SA</given-names></name>, editors. <source>Approaching language transfer through text classification: Explorations in the detection-based approach</source>. <publisher-loc>Bristol, United Kingdom</publisher-loc>: <publisher-name>Multilingual Matters</publisher-name>; <year>2012</year>. p. <fpage>34</fpage>–<lpage>70</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kyle</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Crossley</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Kim</surname> <given-names>Y</given-names></name>. <article-title>Native language identification and writing proficiency</article-title>. <source>Int J Learn Corpus Res</source>. <year>2015</year>;<volume>1</volume>(<issue>2</issue>):<fpage>187</fpage>–<lpage>209</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref024"><label>24</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Stemle</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Onysko</surname> <given-names>A</given-names></name>. <chapter-title>Automated L1 identification in English learner essays and its implications for language transfer</chapter-title>. In: <name name-style="western"><surname>Peukert</surname> <given-names>H</given-names></name>, editor. <source>Transfer effects in multilingual language development</source>. <publisher-loc>Amsterdam/Philadelphia</publisher-loc>: <publisher-name>John Benjamins Publishing Company</publisher-name>; <year>2015</year>. p. <fpage>297</fpage>–<lpage>321</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Crossley</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>McNamara</surname> <given-names>DS</given-names></name>. <article-title>Shared features of L2 writing: Intergroup homogeneity and text classification</article-title>. <source>J Second Lang Writ</source>. <year>2011</year>;<volume>20</volume>(<issue>4</issue>):<fpage>271</fpage>–<lpage>85</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wichmann</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Holman</surname> <given-names>EW</given-names></name>, <name name-style="western"><surname>Brown</surname> <given-names>CH</given-names></name>. <source>The ASJP database</source> [Internet]. <year>2018</year>. Available from: <ext-link ext-link-type="uri" xlink:href="https://asjp.clld.org/" xlink:type="simple">https://asjp.clld.org/</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Swadesh</surname> <given-names>M.</given-names></name> <article-title>Salish internal relationships</article-title>. <source>Int J Am Linguist</source>. <year>1950</year>;<volume>16</volume>(<issue>4</issue>):<fpage>157</fpage>–<lpage>67</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref028"><label>28</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Key</surname> <given-names>MR</given-names></name>, <name name-style="western"><surname>Comrie</surname> <given-names>B</given-names></name>. <source>The intercontinental dictionary series</source> [Internet]. <publisher-loc>Leipzig</publisher-loc>: <publisher-name>Max Planck Institute for Evolutionary Anthropology</publisher-name>; <year>2015</year>. Available from: <ext-link ext-link-type="uri" xlink:href="https://ids.clld.org/" xlink:type="simple">https://ids.clld.org/</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>De Wilde</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Brysbaert</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Eyckmans</surname> <given-names>J</given-names></name>. <article-title>Formal versus informal L2 learning: How do individual differences and word-related variables influence French and English L2 vocabulary learning in Dutch-speaking children?</article-title> <source>Stud Second Lang Acquis</source>. <year>2022</year>;<volume>44</volume>(<issue>1</issue>):<fpage>87</fpage>–<lpage>111</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>De Wilde</surname> <given-names>V</given-names></name>, <name name-style="western"><surname>Brysbaert</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Eyckmans</surname> <given-names>J</given-names></name>. <article-title>Learning English through out-of-school exposure: How do word-related variables and proficiency influence receptive vocabulary learning?</article-title> <source>Lang Learn</source>. <year>2020</year>;<volume>70</volume>(<issue>2</issue>):<fpage>349</fpage>–<lpage>81</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref031"><label>31</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Beijering</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Gooskens</surname> <given-names>C</given-names></name>, <name name-style="western"><surname>Heeringa</surname> <given-names>W</given-names></name>. <chapter-title>Predicting intelligibility and perceived linguistic distance by means of the Levenshtein algorithm</chapter-title>. In: <name name-style="western"><surname>Koppen</surname> <given-names>M van</given-names></name>, <name name-style="western"><surname>Botma</surname> <given-names>B</given-names></name>, editors. <source>Linguistics in the Netherlands</source>. <publisher-loc>Amsterdam</publisher-loc>: <publisher-name>John Benjamins</publisher-name>; <year>2008</year>. p. <fpage>13</fpage>–<lpage>24</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sadat</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Pureza</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Alario</surname> <given-names>FX</given-names></name>. <article-title>Traces of an early learned second language in discontinued bilingualism</article-title>. <source>Lang Learn</source>. <year>2016</year>;<volume>66</volume>(<issue>Suppl. 2</issue>):<fpage>210</fpage>–<lpage>33</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref033"><label>33</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Speer</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Chin</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Lin</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Jewett</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Nathan</surname> <given-names>L</given-names></name>. <source>wordfreq</source> [Internet]. <publisher-name>LuminosoInsight</publisher-name>; <year>2018</year>. Available from: <ext-link ext-link-type="uri" xlink:href="https://github.com/LuminosoInsight/wordfreq/" xlink:type="simple">https://github.com/LuminosoInsight/wordfreq/</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref034"><label>34</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>van Heuven</surname> <given-names>WJB</given-names></name>, <name name-style="western"><surname>Mandera</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Keuleers</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Brysbaert</surname> <given-names>M</given-names></name>. <article-title>SUBTLEX-UK: A new and improved word frequency database for British English</article-title>. <source>Q J Exp Psychol</source>. <year>2014</year>;<volume>67</volume>(<issue>6</issue>):<fpage>1176</fpage>–<lpage>90</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1080/17470218.2013.850521" xlink:type="simple">10.1080/17470218.2013.850521</ext-link></comment> <object-id pub-id-type="pmid">24417251</object-id></mixed-citation></ref>
<ref id="pone.0281137.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Geertzen</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Baker</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Jiang</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Korhonen</surname> <given-names>A</given-names></name>. <source>The EF-Cambridge open language database (EFCAMDAT) user manual part I: Written production</source>. In <year>2013</year>. Available from: <ext-link ext-link-type="uri" xlink:href="https://corpus.mml.cam.ac.uk/" xlink:type="simple">https://corpus.mml.cam.ac.uk/</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Murakami</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Korhonen</surname> <given-names>A</given-names></name>. <article-title>Dependency parsing of learner English</article-title>. <source>Int J Corpus Linguist</source>. <year>2018</year>;<volume>23</volume>(<issue>1</issue>):<fpage>28</fpage>–<lpage>54</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref037"><label>37</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Huang</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Geertzen</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Baker</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Korhonen</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>. <source>The EF-Cambridge open language database (EFCAMDAT): Information for users</source> [Internet]. <year>2017</year>. p. <fpage>1</fpage>–<lpage>18</lpage>. Available from: <ext-link ext-link-type="uri" xlink:href="https://corpus.mml.cam.ac.uk/" xlink:type="simple">https://corpus.mml.cam.ac.uk/</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref038"><label>38</label><mixed-citation publication-type="book" xlink:type="simple"><collab>Council of Europe</collab>. <source>Common European framework of reference for languages: Learning, teaching, assessment</source>. <publisher-loc>Cambridge, United Kingdom</publisher-loc>: <publisher-name>Cambridge University Press</publisher-name>; <year>2001</year>.</mixed-citation></ref>
<ref id="pone.0281137.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Michel</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Murakami</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Meurers</surname> <given-names>D</given-names></name>. <article-title>Task effects on linguistic complexity and accuracy: A large-scale learner corpus analysis employing natural language processing techniques</article-title>. <source>Lang Learn</source>. <year>2017</year>;<volume>67</volume>(<issue>S1</issue>):<fpage>180</fpage>–<lpage>208</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref040"><label>40</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shatz</surname> <given-names>I</given-names></name>. <article-title>Refining and modifying the EFCAMDAT: Lessons from creating a new corpus from an existing large-scale English learner language database</article-title>. <source>Int J Learn Corpus Res</source>. <year>2020</year>;<volume>6</volume>(<issue>2</issue>):<fpage>221</fpage>–<lpage>37</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref041"><label>41</label><mixed-citation publication-type="other" xlink:type="simple">Hartig F. DHARMa: Residual diagnostics for hierarchical (multi-level / mixed) regression models [Internet]. 2021. Available from: <ext-link ext-link-type="uri" xlink:href="http://web.archive.org/web/20210528100353/" xlink:type="simple">http://web.archive.org/web/20210528100353/</ext-link><ext-link ext-link-type="uri" xlink:href="https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html" xlink:type="simple">https://cran.r-project.org/web/packages/DHARMa/vignettes/DHARMa.html</ext-link></mixed-citation></ref>
<ref id="pone.0281137.ref042"><label>42</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Hox</surname> <given-names>JJ</given-names></name>, <name name-style="western"><surname>Moerbeek</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Schoot</surname> <given-names>R van de</given-names></name>. <source>Multilevel analysis: Techniques and applications</source>. <publisher-loc>New York, NY, USA</publisher-loc>: <publisher-name>Routledge</publisher-name>; <year>2018</year>.</mixed-citation></ref>
<ref id="pone.0281137.ref043"><label>43</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Winter</surname> <given-names>B.</given-names></name> <source>Statistics for linguists: An introduction using R</source>. <publisher-name>Routledge</publisher-name>; <year>2019</year>.</mixed-citation></ref>
<ref id="pone.0281137.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Shatz</surname> <given-names>I.</given-names></name> <article-title>How native language and L2 proficiency affect EFL learners’ capitalisation abilities: A large-scale corpus study</article-title>. <source>Corpora</source>. <year>2019</year>;<volume>14</volume>(<issue>2</issue>):<fpage>173</fpage>–<lpage>202</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref045"><label>45</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Michel</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Murakami</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Meurers</surname> <given-names>D</given-names></name>. <article-title>Effects of task type on morphosyntactic complexity across proficiency: Evidence from a large learner corpus of A1 to C2 writings</article-title>. <source>Instr Second Lang Acquis</source>. <year>2019</year>;<volume>3</volume>(<issue>2</issue>):<fpage>124</fpage>–<lpage>52</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref046"><label>46</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chen</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Tsimpli</surname> <given-names>I</given-names></name>. <article-title>Automatic extraction of subordinate clauses and its application in second language acquisition research</article-title>. <source>Behav Res Methods</source>. <year>2021</year>;<volume>53</volume>(<issue>2</issue>):<fpage>803</fpage>–<lpage>17</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.3758/s13428-020-01456-7" xlink:type="simple">10.3758/s13428-020-01456-7</ext-link></comment> <object-id pub-id-type="pmid">32875403</object-id></mixed-citation></ref>
<ref id="pone.0281137.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Alexopoulou</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Geertzen</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Korhonen</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Meurers</surname> <given-names>D</given-names></name>. <article-title>Exploring big educational learner corpora for SLA research: Perspectives on relative clauses</article-title>. <source>Int J Learn Corpus Res</source>. <year>2015</year>;<volume>1</volume>(<issue>1</issue>):<fpage>96</fpage>–<lpage>129</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref048"><label>48</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Jiang</surname> <given-names>X</given-names></name>, <name name-style="western"><surname>Guo</surname> <given-names>Y</given-names></name>, <name name-style="western"><surname>Geertzen</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Alexopoulou</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>Sun</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Korhonen</surname> <given-names>A</given-names></name>. <chapter-title>Native language identification using large, longitudinal data</chapter-title>. In: <name name-style="western"><surname>Calzolari</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Choukri</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Declerck</surname> <given-names>T</given-names></name>, <name name-style="western"><surname>Loftsson</surname> <given-names>H</given-names></name>, <name name-style="western"><surname>Maegaard</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Mariani</surname> <given-names>J</given-names></name>, <etal>et al</etal>., editors. <source>Proceedings of the Ninth International Conference on Language Resources and Evaluation</source>. <publisher-loc>Reykjavik, Iceland</publisher-loc>: <publisher-name>European Language Resources Association</publisher-name>; <year>2014</year>. p. <fpage>3309</fpage>–<lpage>12</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref049"><label>49</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Murakami</surname> <given-names>A.</given-names></name> <article-title>Modeling systematicity and individuality in nonlinear second language development: The case of English grammatical morphemes</article-title>. <source>Lang Learn</source>. <year>2016</year>;<volume>66</volume>(<issue>4</issue>):<fpage>834</fpage>–<lpage>71</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref050"><label>50</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Shatz</surname> <given-names>I.</given-names></name> <chapter-title>Native language influence during second language acquisition: A large-scale learner corpus analysis</chapter-title>. In: <name name-style="western"><surname>Hirakawa</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Matthews</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Otaki</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Snape</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Umeda</surname> <given-names>M</given-names></name>, editors. <source>Proceedings of the Pacific Second Language Research Forum (PacSLRF 2016)</source>. <publisher-loc>Hiroshima, Japan</publisher-loc>: <publisher-name>Japan Second Language Association</publisher-name>; <year>2017</year>. p. <fpage>175</fpage>–<lpage>80</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref051"><label>51</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schachter</surname> <given-names>J.</given-names></name> <article-title>An error in error analysis</article-title>. <source>Lang Learn</source>. <year>1974</year>;<volume>24</volume>(<issue>2</issue>):<fpage>205</fpage>–<lpage>14</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Zenker</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Kyle</surname> <given-names>K</given-names></name>. <article-title>Investigating minimum text lengths for lexical diversity indices</article-title>. <source>Assess Writ</source>. <year>2021</year>;<volume>47</volume>:<fpage>100505</fpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref053"><label>53</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Kyle</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Crossley</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>McNamara</surname> <given-names>DS</given-names></name>. <article-title>Construct validity in TOEFL iBT speaking tasks: Insights from natural language processing</article-title>. <source>Lang Test</source>. <year>2016</year>;<volume>33</volume>(<issue>3</issue>):<fpage>319</fpage>–<lpage>40</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref054"><label>54</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Reid</surname> <given-names>J.</given-names></name> <chapter-title>Using the writer’s workbench in composition teaching and testing</chapter-title>. In: <name name-style="western"><surname>Stansfield</surname> <given-names>C</given-names></name>, editor. <source>Technology and language testing</source>. <publisher-name>TESOL</publisher-name>; <year>1986</year>. p. <fpage>167</fpage>–<lpage>188</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref055"><label>55</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Michel</surname> <given-names>M</given-names></name>. <chapter-title>Complexity, accuracy and fluency in L2 production</chapter-title>. In: <name name-style="western"><surname>Loewen</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Sato</surname> <given-names>M</given-names></name>, editors. <source>The Routledge handbook of instructed second language acquisition</source>. <publisher-loc>London, United Kingdom</publisher-loc>: <publisher-name>Routledge</publisher-name>; <year>2017</year>. p. <fpage>50</fpage>–<lpage>68</lpage>.</mixed-citation></ref>
<ref id="pone.0281137.ref056"><label>56</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Shatz</surname> <given-names>I.</given-names></name> <source>The potential influence of crosslinguistic similarity on lexical transfer: Examining vocabulary use in L2 English</source>. <publisher-name>University of Cambridge</publisher-name>; <year>2022</year>.</mixed-citation></ref>
</ref-list>
</back>
<sub-article article-type="aggregated-review-documents" id="pone.0281137.r001" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0281137.r001</article-id>
<title-group>
<article-title>Decision Letter 0</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Bongelli</surname>
<given-names>Ramona</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2023</copyright-year>
<copyright-holder>Ramona Bongelli</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0281137" document-id-type="doi" document-type="article" id="rel-obj001" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>0</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">8 Jan 2023</named-content>
</p>
<p><!-- <div> -->PONE-D-22-31186<!-- </div> --><!-- <div> -->The Potential Influence of Crosslinguistic Similarity on Lexical Transfer: Examining Vocabulary Choice in L2 English<!-- </div> --><!-- <div> -->PLOS ONE</p>
<p>Dear Dr. Shatz,</p>
<p>Thank you for submitting your manuscript to PLOS ONE. After careful consideration, we feel that it has merit but does not fully meet PLOS ONE’s publication criteria as it currently stands. Therefore, we invite you to submit a revised version of the manuscript that addresses the points raised during the review process.</p>
<p>Please submit your revised manuscript by Feb 22 2023 11:59PM. If you will need more time than this to complete your revisions, please reply to this message or contact the journal office at <email xlink:type="simple">plosone@plos.org</email>. When you're ready to submit your revision, log on to <ext-link ext-link-type="uri" xlink:href="https://www.editorialmanager.com/pone/" xlink:type="simple">https://www.editorialmanager.com/pone/</ext-link> and select the 'Submissions Needing Revision' folder to locate your manuscript file.</p>
<p>Please include the following items when submitting your revised manuscript:<!-- </div> --><list list-type="bullet"><list-item><p>A rebuttal letter that responds to each point raised by the academic editor and reviewer(s). You should upload this letter as a separate file labeled 'Response to Reviewers'.</p></list-item><list-item><p>A marked-up copy of your manuscript that highlights changes made to the original version. You should upload this as a separate file labeled 'Revised Manuscript with Track Changes'.</p></list-item><list-item><p>An unmarked version of your revised paper without tracked changes. You should upload this as a separate file labeled 'Manuscript'.</p></list-item></list><!-- <div> -->If you would like to make changes to your financial disclosure, please include your updated statement in your cover letter. Guidelines for resubmitting your figure files are available below the reviewer comments at the end of this letter.</p>
<p>If applicable, we recommend that you deposit your laboratory protocols in protocols.io to enhance the reproducibility of your results. Protocols.io assigns your protocol its own identifier (DOI) so that it can be cited independently in the future. For instructions see: <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols" xlink:type="simple">https://journals.plos.org/plosone/s/submission-guidelines#loc-laboratory-protocols</ext-link>. Additionally, PLOS ONE offers an option for publishing peer-reviewed Lab Protocol articles, which describe protocols hosted on protocols.io. Read more information on sharing protocols at <ext-link ext-link-type="uri" xlink:href="https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols" xlink:type="simple">https://plos.org/protocols?utm_medium=editorial-email&amp;utm_source=authorletters&amp;utm_campaign=protocols</ext-link>.</p>
<p>We look forward to receiving your revised manuscript.</p>
<p>Kind regards,</p>
<p>Ramona Bongelli, Ph.D.</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Journal Requirements:</p>
<p>When submitting your revision, we need you to address these additional requirements.</p>
<p>1. Please ensure that your manuscript meets PLOS ONE's style requirements, including those for file naming. The PLOS ONE style templates can be found at </p>
<p><ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=wjVg/PLOSOne_formatting_sample_main_body.pdf</ext-link> and <ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf" xlink:type="simple">https://journals.plos.org/plosone/s/file?id=ba62/PLOSOne_formatting_sample_title_authors_affiliations.pdf</ext-link>.</p>
<p>2. We noted in your submission details that a portion of your manuscript may have been presented or published elsewhere. [Yes; the full content of the paper (with some modifications) has been published as part of the first author's PhD thesis (see Ch. 4), available at <ext-link ext-link-type="uri" xlink:href="https://www.repository.cam.ac.uk/handle/1810/339032" xlink:type="simple">https://www.repository.cam.ac.uk/handle/1810/339032</ext-link></p>
<p>This does not constitute dual publication based on PLOS's guidelines (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/ethical-publishing-practice" xlink:type="simple">https://journals.plos.org/plosone/s/ethical-publishing-practice</ext-link>), which state that "Prior publication of research as a thesis, presentation at medical or scientific conferences, or posting on preprint servers will not preclude consideration of your manuscript."] Please clarify whether this publication was peer-reviewed and formally published. If this work was previously peer-reviewed and published, in the cover letter please provide the reason that this work does not constitute dual publication and should be included in the current manuscript.</p>
<p>3. Please review your reference list to ensure that it is complete and correct. If you have cited papers that have been retracted, please include the rationale for doing so in the manuscript text, or remove these references and replace them with relevant current references. Any changes to the reference list should be mentioned in the rebuttal letter that accompanies your revised manuscript. If you need to cite a retracted article, indicate the article’s retracted status in the References list and also include a citation and full reference for the retraction notice.</p>
<p>[Note: HTML markup is below. Please do not edit.]</p>
<p>Reviewers' comments:</p>
<p>Reviewer's Responses to Questions</p>
<p><!-- <font color="black"> --><bold>Comments to the Author</bold></p>
<p>1. Is the manuscript technically sound, and do the data support the conclusions?</p>
<p>The manuscript must describe a technically sound piece of scientific research with data that supports the conclusions. Experiments must have been conducted rigorously, with appropriate controls, replication, and sample sizes. The conclusions must be drawn appropriately based on the data presented. <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->2. Has the statistical analysis been performed appropriately and rigorously? <!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->3. Have the authors made all data underlying the findings in their manuscript fully available?</p>
<p>The <ext-link ext-link-type="uri" xlink:href="http://www.plosone.org/static/policies.action#sharing" xlink:type="simple">PLOS Data policy</ext-link> requires authors to make all data underlying the findings described in their manuscript fully available without restriction, with rare exception (please refer to the Data Availability Statement in the manuscript PDF file). The data should be provided as part of the manuscript or its supporting information, or deposited to a public repository. For example, in addition to summary statistics, the data points behind means, medians and variance measures should be available. If there are restrictions on publicly sharing data—e.g. participant privacy or use of data from a third party—those must be specified.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->4. Is the manuscript presented in an intelligible fashion and written in standard English?</p>
<p>PLOS ONE does not copyedit accepted manuscripts, so the language in submitted articles must be clear, correct, and unambiguous. Any typographical or grammatical errors should be corrected at revision, so please note any specific errors here.<!-- </font> --></p>
<p>Reviewer #1: Yes</p>
<p>Reviewer #2: Yes</p>
<p>**********</p>
<p><!-- <font color="black"> -->5. Review Comments to the Author</p>
<p>Please use the space provided to explain your answers to the questions above. You may also include additional comments for the author, including concerns about dual publication, research ethics, or publication ethics. (Please upload your review as an attachment if it exceeds 20,000 characters)<!-- </font> --></p>
<p>Reviewer #1: Summary:</p>
<p>Using EFL learners corpora, the authors examine the effect of cross-linguistic similarity on the production of L2 words. The authors further examine if this relationship is influenced by the learners’ L2 proficiency. The results show that lexical distance, language background (i.e., the learners’ L1) and L2 proficiency do not influence word choices. Rather, word choice seems to be determined by task.</p>
<p>Overall, I found the paper very clearly written (especially the methods and results sections), and I enjoyed reading it. I also really appreciate the Open Science practices adopted by the authors as well as how they clearly integrate them in the body of the manuscript. I only have a few minor comments.</p>
<p>Overall comments:</p>
<p>Abstract:</p>
<p>-I find it difficult to understand what modelling the similarity on the rate of use of L2 words means. Could the authors reformulate?</p>
<p>Introduction:</p>
<p>-I was wondering if the authors could define more clearly similarity in the introduction, and particularly focus on the type of similarity they focus on in the paper. For now, the introduction is rather focused on cognates.</p>
<p>-Could the authors define cognancy upon first mention? (and in the abstract as well if possible)</p>
<p>-Could the authors elaborate a bit on why lexical transfer can affect language use and acquisition either positively or negatively?</p>
<p>-I also think it would be useful to add a paragraph on how L2 proficiency could influence the relationship between cross-linguistic similarity and L2 word choices based on previous research, given that this is one of the two main research questions.</p>
<p>Analysis:</p>
<p>-What did the authors use to perform the model comparisons (p.23)?</p>
<p>Discussion:</p>
<p>-Could it be that the authors observed no effect of lexical distance because the learners completed the task in the written modality? Would the authors expect differences between written and spoken production? Especially given that similarity is partly calculated based on phonological information. It could also be that writing relies on more conscious processes, whereby learners are more likely to (have time to) think about which word to use to meet the task requirement). In contrast, learners could be more influenced could be influenced by less conscious processes during (faster) spoken production, and thus be more influenced by factors such as phonological overlap during spoken production.</p>
<p>Minor comments:</p>
<p>-“the goal and context of communication generally play a greater role in L2 production (e.g., word choice in essays) than in many experimental processing paradigms (e.g., reaction time to isolated words)” (p.3): can the authors include references to support these claims?</p>
<p>-“There is evidence that increased lexical similarity between languages—as measured through the mean similarity of L1-L2 word pairs”�<!--  -->what type of similarity are the authors referring to here?</p>
<p>-footnote 12: what do the authors mean with “although this does not substantially influence our findings” (p.22)</p>
<p>-I spotted a few typos:</p>
<p>-p.6: “is a simply feature of”</p>
<p>-p.18: “show that the models do are”</p>
<p>-p.30: “analsyes”</p>
<p>Reviewer #2: This is an interesting paper which sought to establish whether L1-L2 formal lexical similarity affects L2 word choice. Taking phonological overlap between L1 words and their English translations as the point of departure, the authors modelled the influence of similarity on the use of L2 words, using mixed-effects statistical methods. The authors explain in depth the concepts and methods applied in the study, and the presentation of the data is clear and comprehensible. The findings enrich the existing body of research on crosslinguistic influence, in particular that focusing on lexical transfer, and point to the need to control for task effects in future studies.</p>
<p>I recommend the paper for publication.</p>
<p>Below is a list of editorial inaccuracies which require the authors’ attention:</p>
<p>p. 6</p>
<p>is a simply feature</p>
<p>p. 16</p>
<p>favorite [American spelling]</p>
<p>p. 18</p>
<p>appear in in Table 4</p>
<p>the count-based models [missing full stop]</p>
<p>show that the models do are</p>
<p>The authors may also want to consider the following observations:</p>
<p>The authors state that they consider only one aspect of formal similarity (phonological overlap), while disregarding other factors which may also affect lexical transfer (orthographic depth, semantic/pragmatic similarity). Given that the study examines written material and not oral production, wouldn't orthographic depth, rather than phonological overlap, be more relevant?</p>
<p>Word frequency</p>
<p>The authors may want to consider the potential interaction between frequency and genre/ text type.</p>
<p>The baseline frequency relied on in the study does not consider the in/frequency of individual lexical items in the genres/text types that the learners were required to produce.</p>
<p>The authors note the task:word effect and conclude that the need to use specific words in specific tasks has a strong influence on learners’ rate of use of L2 words. They also reference earlier research (Jarvis &amp; Pavlenko 2008) which found that formality and task type may influence transfer. It is therefore recommended that they control for the effects of the purpose and context of production, i.e. mode of communication (spoken/written), genre (text type) and register (degree of formality) in future studies.</p>
<p>**********</p>
<p><!-- <font color="black"> -->6. PLOS authors have the option to publish the peer review history of their article (<ext-link ext-link-type="uri" xlink:href="https://journals.plos.org/plosone/s/editorial-and-peer-review-process#loc-peer-review-history" xlink:type="simple">what does this mean?</ext-link>). If published, this will include your full peer review and any attached files.</p>
<p>If you choose “no”, your identity will remain anonymous but your review may still be made public.</p>
<p><bold>Do you want your identity to be public for this peer review?</bold> For information about this choice, including consent withdrawal, please see our <ext-link ext-link-type="uri" xlink:href="https://www.plos.org/privacy-policy" xlink:type="simple">Privacy Policy</ext-link>.<!-- </font> --></p>
<p>Reviewer #1: No</p>
<p>Reviewer #2: No</p>
<p>**********</p>
<p>[NOTE: If reviewer comments were submitted as an attachment file, they will be attached to this email and accessible via the submission site. Please log into your account, locate the manuscript record, and check for the action link "View Attachments". If this link does not appear, there are no attachment files.]</p>
<p>While revising your submission, please upload your figure files to the Preflight Analysis and Conversion Engine (PACE) digital diagnostic tool, <ext-link ext-link-type="uri" xlink:href="https://pacev2.apexcovantage.com/" xlink:type="simple">https://pacev2.apexcovantage.com/</ext-link>. PACE helps ensure that figures meet PLOS requirements. To use PACE, you must first register as a user. Registration is free. Then, login and navigate to the UPLOAD tab, where you will find detailed instructions on how to use the tool. If you encounter any issues or have any questions when using PACE, please email PLOS at <email xlink:type="simple">figures@plos.org</email>. Please note that Supporting Information files do not need this step.</p>
</body>
</sub-article>
<sub-article article-type="author-comment" id="pone.0281137.r002">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0281137.r002</article-id>
<title-group>
<article-title>Author response to Decision Letter 0</article-title>
</title-group>
<related-object document-id="10.1371/journal.pone.0281137" document-id-type="doi" document-type="peer-reviewed-article" id="rel-obj002" link-type="rebutted-decision-letter" object-id="10.1371/journal.pone.0281137.r001" object-id-type="doi" object-type="decision-letter"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="author-response-date">12 Jan 2023</named-content>
</p>
<p>We would like to thank the editor and reviewers for their time.</p>
<p>Below, we outline how we revised the manuscript to address the reviewers’ comments. Note that:</p>
<p>&gt; This mark indicates a reviewer’s comment.</p>
<p>— This mark indicates our response.</p>
<p>Reviewer #1</p>
<p>&gt; Abstract:</p>
<p>&gt; I find it difficult to understand what modelling the similarity on the rate of use of L2 words means. Could the authors reformulate?</p>
<p>— Reformulated it to read as follows: “We then used mixed-effects statistical models to examine how this similarity influences the rate of use of the L2 words; essentially, we checked whether L2 words that are more similar to their L1 translations are used more often.” We also added the following line right after this: “We also controlled for potential confounds, including the baseline L1 frequency of the English words.”</p>
<p>&gt; Introduction:</p>
<p>&gt; I was wondering if the authors could define more clearly similarity in the introduction, and particularly focus on the type of similarity they focus on in the paper. For now, the introduction is rather focused on cognates.</p>
<p>— Added more of an explanation about this: ‘This similarity is usually conceptualized based on the overlap in sounds and/or letters between words in different languages. For example, the French word for “orange” is also spelled “orange” (though pronounced slightly differently), so it has higher formal similarity with its English translation than does the French word for “lemon” (“citron”). Accordingly, it will generally be easier for French speakers to acquire the English word “orange” than the word “lemon”.’</p>
<p>&gt; Could the authors define cognancy upon first mention? (and in the abstract as well if possible)</p>
<p>— We updated the introduction and abstract as suggested:</p>
<p>- Introduction: “When two words with similar meanings across languages have a high level of formal similarity, they can be considered to be psycholinguistic cognates, though there is no exact threshold for cognancy based on similarity. Psycholinguistic cognancy frequently occurs because the words are also historical cognates, meaning that they share a common etymology, though cognancy may also involve words borrowed through language contact [11,13,14].”</p>
<p>- Abstract: “We quantified similarity based on phonological overlap between L1 words and their L2 (English) translations. This similarity relates to psycholinguistic cognancy, which occurs when words and their translations share a high level of formal similarity, often due to historical cognancy from shared etymology or language contact.</p>
<p>&gt; Could the authors elaborate a bit on why lexical transfer can affect language use and acquisition either positively or negatively?</p>
<p>— Updated the introduction to include this, so it now reads as follows: “This is often attributed to lexical transfer [1], a type of language transfer or crosslinguistic influence [2–4]. Transfer can be positive when it facilitates language acquisition or use, for example because an L2 linguistic structure (e.g., a certain word) is identical to a corresponding structure in a learner’s L1, which makes it easy for the learner to remember it. Transfer can also be negative when it hinders language acquisition or use, in which case it is also called interference; this can occur, for example, because an L2 structure is very different from a corresponding structure in a learner’s L1, which makes it hard for the learner to remember it.”</p>
<p>&gt; I also think it would be useful to add a paragraph on how L2 proficiency could influence the relationship between cross-linguistic similarity and L2 word choices based on previous research, given that this is one of the two main research questions.</p>
<p>— We now have the following paragraph in the introduction: “Like most types of crosslinguistic influence, this form of lexical transfer is expected to play a role primarily during early stages of SLA, when learners rely more on their L1 to form and use their mental lexicon. However, this influence can also play a role at advanced stages of SLA, and therefore affect even advanced L2 learners [1,5,6,10,15].”</p>
<p>&gt; Analysis:</p>
<p>&gt; What did the authors use to perform the model comparisons (p.23)?</p>
<p>— Added a clarification that this is “based on AIC and BIC” (we elaborate on these measures in §5.2 of the SI, where we present the comparisons and provide relevant references).</p>
<p>&gt; Discussion:</p>
<p>&gt; Could it be that the authors observed no effect of lexical distance because the learners completed the task in the written modality? Would the authors expect differences between written and spoken production? Especially given that similarity is partly calculated based on phonological information. It could also be that writing relies on more conscious processes, whereby learners are more likely to (have time to) think about which word to use to meet the task requirement). In contrast, learners could be more influenced could be influenced by less conscious processes during (faster) spoken production, and thus be more influenced by factors such as phonological overlap during spoken production.</p>
<p>— We agree that the fact that learners were writing, rather than speaking, may have affected their productions in a way that could have weakened any potential effect of crosslinguistic similarity for the reasons you described. However, we don’t think that this alone can explain the complete lack of effect in the study, given that past research on this effect did find it in written productions (especially the research on word-choice transfer which is the most similar to ours, like Jarvis et al., 2012 and Rabinovich et al., 2018). Nevertheless, since this is an interesting an important question to address, we added the following to our “Limitations and future research” section: “Likewise, it would be interesting to compare written L2 productions to spoken ones, when these are produced by similar learners under similar conditions. This will show whether and how the effect of this crosslinguistic similarity, if it appears, varies across these two modes of language production.” (We focused on the future research, rather than the potential mechanism, to avoid being too speculative. Also, note that this was previously mentioned more briefly at the end of this section, so we consolidated the previous mention with the new one.)</p>
<p>— In addition, we now note in the body of the paper (under “Calculating lexical distance”) that there is a strong correlation between phonological and orthographic overlap in our sample (r = .68, 95% CI = [.67, .70], p &lt; .001; this is for the parallel dictionaries, where all the L1s share English’s Latin script), and that a similar correlation was found in other studies (e.g., r = .782 in Carrasco-Ortiz et al., 2021). Accordingly, the phonological distances that we used were also highly indicative of the orthographic distances between words. (This was previously included only in a footnote in the SI, but given the comments by you and the other reviewer, we added a mention of this to the paper itself.) This further suggests that the lack of effect here is not solely due to the productions being written, especially given how robust the null effect was. </p>
<p>&gt; Minor comments:</p>
<p>&gt; “the goal and context of communication generally play a greater role in L2 production (e.g., word choice in essays) than in many experimental processing paradigms (e.g., reaction time to isolated words)” (p.3): can the authors include references to support these claims?</p>
<p>— This is a general observation that we made based on our experience, but since we don’t have specific references to support it, we moderated the claim to show its speculate nature, so we now say “might play a greater role” rather than “generally play a greater role”. </p>
<p>&gt; “There is evidence that increased lexical similarity between languages—as measured through the mean similarity of L1-L2 word pairs” what type of similarity are the authors referring to here?</p>
<p>— The studies that we cite there used several different measures. Since this isn’t the focus of the statement, and we don’t want to get sidetracked by explaining the different measures there, we simplified this to: “There is evidence that increased overall lexical similarity between languages improves learning outcomes, thus leading to higher scores in L2 proficiency tests [17–19].”</p>
<p>&gt; footnote 12: what do the authors mean with “although this does not substantially influence our findings” (p.22)</p>
<p>— Edited the material so it should be clearer: ‘We tried adding other random effects, but this led to convergence issues, and even in cases where the models converged, their key results were the same as they were for these models. For more information, see Appendix S5 (under “Models with alternative random effects”).’ (Note that this is now mentioned in the body, rather than in a footnote, in line with PLOS’s style requirements).</p>
<p>&gt; I spotted a few typos:</p>
<p>&gt; p.6: “is a simply feature of”</p>
<p>&gt; p.18: “show that the models do are”</p>
<p>&gt; p.30: “analsyes”</p>
<p>— Fixed these.</p>
<p>Reviewer #2</p>
<p>&gt; Editorial inaccuracies which require the authors’ attention:</p>
<p>&gt; p. 6, is a simply feature</p>
<p>&gt; p. 16, favorite [American spelling]</p>
<p>&gt; p. 18, appear in in Table 4; the count-based models [missing full stop]; show that the models do are</p>
<p>— Fixed these.</p>
<p>&gt; The authors may also want to consider the following observations:</p>
<p>&gt; The authors state that they consider only one aspect of formal similarity (phonological overlap), while disregarding other factors which may also affect lexical transfer (orthographic depth, semantic/pragmatic similarity). Given that the study examines written material and not oral production, wouldn't orthographic depth, rather than phonological overlap, be more relevant?</p>
<p>— We added the following explanation to the body of the paper to address this (under “Calculating lexical distance”): “We used phonological—rather than orthographic—overlap as a measure of distance, because this enables us to examine distance from L1s that have a substantially different script from English, like Arabic and Mandarin. Nevertheless, we also examined a sample (presented later under “Learner sample”) where all the L1s share English’s Latin script, and in this sample, there was a strong correlation between phonological and orthographic overlap (r = .68, 95% CI = [.67, .70], p &lt; .001). This aligns with findings of other research [11,29,30], like an r = .782 found in a dataset of English and Spanish words [11]. This strong correlation suggests that the phonological overlap that we found for in L1s that share English’s script is indicative of the associated orthographic overlap in these L1s, so even if a large part of the effect of similarity is due to overlap in orthography, we would expect to detect it in our analyses.” (Note: Some of this material was/is also included as a footnote in the SI, but given the comment we thought that it would help to highlight it in the body of the paper.)</p>
<p>&gt; Word frequency</p>
<p>&gt; The authors may want to consider the potential interaction between frequency and genre/ text type. The baseline frequency relied on in the study does not consider the in/frequency of individual lexical items in the genres/text types that the learners were required to produce.</p>
<p>— We added this to the “Limitations and future research” section (see our response to the next comment). We agree that this might be beneficial for refining the analyses further, but it currently isn’t feasible to do this in our sample, for two reasons. First, there is no existing classification that covers all the tasks (especially in the EFCAMDAT Cleaned Subcorpus), and creating one would necessitate a full extensive study of its own. Second, even once such a classification exists, it would be necessary to get corresponding texts produced by L1 speakers under similar conditions, which would necessitate another huge research project, especially given the number of tasks in this dataset. We could potentially add a random effect between task and frequency to sort of account for this instead, but based on all the alternative models that we built (see §5.1 in the SI), this will very likely cause convergence issues. This also has other downsides, like reducing the interpretability of this predictor, and making it harder to compare it to corresponding effects found in other studies. Nevertheless, given that the frequency measure that we used was a good predictor in our models, and given the robustness of our main findings, this is very unlikely to change our key findings.</p>
<p>&gt; The authors note the task:word effect and conclude that the need to use specific words in specific tasks has a strong influence on learners’ rate of use of L2 words. They also reference earlier research (Jarvis &amp; Pavlenko 2008) which found that formality and task type may influence transfer. It is therefore recommended that they control for the effects of the purpose and context of production, i.e. mode of communication (spoken/written), genre (text type) and register (degree of formality) in future studies.</p>
<p>— Agreed, so we added the following paragraph to the “Limitations and future research” section: “In addition, future research could also refine these analyses by accounting for further factors. For example, it might be beneficial to look at the baseline L1 frequency of words within specific genres that correspond to the associated writing tasks, rather than in the L1 as a whole. Similarly, it may be beneficial to examine the effects of the genre of text and its degree of formality on the crosslinguistic influence that learners display.” We also mention the point regarding written vs. spoken communication a bit earlier in this section, in response to a comment by the other reviewer: “Likewise, it would be interesting to compare written L2 productions to spoken ones, when these are produced by similar learners under similar conditions. This will show whether and how the effect of this crosslinguistic similarity, if it appears, varies across these two modes of language production.</p>
<supplementary-material id="pone.0281137.s002" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0281137.s002" xlink:type="simple">
<label>Attachment</label>
<caption>
<p>Submitted filename: <named-content content-type="submitted-filename">Response to Reviewers.pdf</named-content></p>
</caption>
</supplementary-material>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pone.0281137.r003" specific-use="decision-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0281137.r003</article-id>
<title-group>
<article-title>Decision Letter 1</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Bongelli</surname>
<given-names>Ramona</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2023</copyright-year>
<copyright-holder>Ramona Bongelli</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0281137" document-id-type="doi" document-type="article" id="rel-obj003" link-type="peer-reviewed-article"/>
<custom-meta-group>
<custom-meta>
<meta-name>Submission Version</meta-name>
<meta-value>1</meta-value>
</custom-meta>
</custom-meta-group>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">16 Jan 2023</named-content>
</p>
<p>Examining the potential influence of crosslinguistic lexical similarity on word-choice transfer in L2 English</p>
<p>PONE-D-22-31186R1</p>
<p>Dear Dr. Shatz,</p>
<p>We’re pleased to inform you that your manuscript has been judged scientifically suitable for publication and will be formally accepted for publication once it meets all outstanding technical requirements.</p>
<p>Within one week, you’ll receive an e-mail detailing the required amendments. When these have been addressed, you’ll receive a formal acceptance letter and your manuscript will be scheduled for publication.</p>
<p>An invoice for payment will follow shortly after the formal acceptance. To ensure an efficient process, please log into Editorial Manager at <ext-link ext-link-type="uri" xlink:href="http://www.editorialmanager.com/pone/" xlink:type="simple">http://www.editorialmanager.com/pone/</ext-link>, click the 'Update My Information' link at the top of the page, and double check that your user information is up-to-date. If you have any billing related questions, please contact our Author Billing department directly at <email xlink:type="simple">authorbilling@plos.org</email>.</p>
<p>If your institution or institutions have a press office, please notify them about your upcoming paper to help maximize its impact. If they’ll be preparing press materials, please inform our press team as soon as possible -- no later than 48 hours after receiving the formal acceptance. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information, please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>Kind regards,</p>
<p>Ramona Bongelli, Ph.D.</p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
<p>Additional Editor Comments (optional):</p>
<p>Reviewers' comments:</p>
</body>
</sub-article>
<sub-article article-type="editor-report" id="pone.0281137.r004" specific-use="acceptance-letter">
<front-stub>
<article-id pub-id-type="doi">10.1371/journal.pone.0281137.r004</article-id>
<title-group>
<article-title>Acceptance letter</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name name-style="western">
<surname>Bongelli</surname>
<given-names>Ramona</given-names>
</name>
<role>Academic Editor</role>
</contrib>
</contrib-group>
<permissions>
<copyright-year>2023</copyright-year>
<copyright-holder>Ramona Bongelli</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<related-object document-id="10.1371/journal.pone.0281137" document-id-type="doi" document-type="article" id="rel-obj004" link-type="peer-reviewed-article"/>
</front-stub>
<body>
<p>
<named-content content-type="letter-date">23 Jan 2023</named-content>
</p>
<p>PONE-D-22-31186R1 </p>
<p>Examining the potential influence of crosslinguistic lexical similarity on word-choice transfer in L2 English </p>
<p>Dear Dr. Shatz:</p>
<p>I'm pleased to inform you that your manuscript has been deemed suitable for publication in PLOS ONE. Congratulations! Your manuscript is now with our production department. </p>
<p>If your institution or institutions have a press office, please let them know about your upcoming paper now to help maximize its impact. If they'll be preparing press materials, please inform our press team within the next 48 hours. Your manuscript will remain under strict press embargo until 2 pm Eastern Time on the date of publication. For more information please contact <email xlink:type="simple">onepress@plos.org</email>.</p>
<p>If we can help with anything else, please email us at <email xlink:type="simple">plosone@plos.org</email>. </p>
<p>Thank you for submitting your work to PLOS ONE and supporting open access. </p>
<p>Kind regards, </p>
<p>PLOS ONE Editorial Office Staff</p>
<p>on behalf of</p>
<p>Professor Ramona Bongelli </p>
<p>Academic Editor</p>
<p>PLOS ONE</p>
</body>
</sub-article>
</article>