<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article
  PUBLIC "-//NLM//DTD Journal Publishing DTD v3.0 20080202//EN" "http://dtd.nlm.nih.gov/publishing/3.0/journalpublishing3.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" article-type="research-article" dtd-version="3.0" xml:lang="en">
  <front>
    <journal-meta><journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id><journal-id journal-id-type="publisher-id">plos</journal-id><journal-id journal-id-type="pmc">plosone</journal-id><issn pub-type="epub">1932-6203</issn><publisher>
        <publisher-name>Public Library of Science</publisher-name>
        <publisher-loc>San Francisco, USA</publisher-loc>
      </publisher></journal-meta>
    <article-meta><article-id pub-id-type="publisher-id">PONE-D-12-10435</article-id><article-id pub-id-type="doi">10.1371/journal.pone.0043230</article-id><article-categories>
        <subj-group subj-group-type="heading">
          <subject>Research Article</subject>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Computer science</subject>
          <subj-group>
            <subject>Software engineering</subject>
            <subj-group>
              <subject>Software tools</subject>
            </subj-group>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Engineering</subject>
          <subj-group>
            <subject>Software engineering</subject>
            <subj-group>
              <subject>Software tools</subject>
            </subj-group>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline-v2">
          <subject>Social and behavioral sciences</subject>
          <subj-group>
            <subject>Communications</subject>
            <subj-group>
              <subject>Natural language</subject>
            </subj-group>
          </subj-group>
          <subj-group>
            <subject>Linguistics</subject>
            <subj-group>
              <subject>Computational linguistics</subject>
              <subject>Linguistic geography</subject>
              <subject>Natural language</subject>
              <subject>Phonology</subject>
              <subject>Psycholinguistics</subject>
              <subject>Sociolinguistics</subject>
              <subject>Structural linguistics</subject>
            </subj-group>
          </subj-group>
        </subj-group>
        <subj-group subj-group-type="Discipline">
          <subject>Computer Science</subject>
        </subj-group>
      </article-categories><title-group><article-title>CLEARPOND: Cross-Linguistic Easy-Access Resource for Phonological and Orthographic Neighborhood Densities</article-title><alt-title alt-title-type="running-head">Cross-Linguistic Neighborhood Densities</alt-title></title-group><contrib-group>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Marian</surname>
            <given-names>Viorica</given-names>
          </name>
          <xref ref-type="aff" rid="aff1"/>
          <xref ref-type="corresp" rid="cor1">
            <sup>*</sup>
          </xref>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Bartolotti</surname>
            <given-names>James</given-names>
          </name>
          <xref ref-type="aff" rid="aff1"/>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Chabal</surname>
            <given-names>Sarah</given-names>
          </name>
          <xref ref-type="aff" rid="aff1"/>
        </contrib>
        <contrib contrib-type="author" xlink:type="simple">
          <name name-style="western">
            <surname>Shook</surname>
            <given-names>Anthony</given-names>
          </name>
          <xref ref-type="aff" rid="aff1"/>
        </contrib>
      </contrib-group><aff id="aff1">
        <addr-line>Northwestern University, Evanston, Illinois, United States of America</addr-line>
      </aff><contrib-group>
        <contrib contrib-type="editor" xlink:type="simple">
          <name name-style="western">
            <surname>White</surname>
            <given-names>Stephanie Ann</given-names>
          </name>
          <role>Editor</role>
          <xref ref-type="aff" rid="edit1"/>
        </contrib>
      </contrib-group><aff id="edit1">
        <addr-line>UCLA, United States of America</addr-line>
      </aff><author-notes>
        <corresp id="cor1">* E-mail: <email xlink:type="simple">v-marian@northwestern.edu</email></corresp>
        <fn fn-type="conflict">
          <p>The authors have declared that no competing interests exist.</p>
        </fn>
        <fn fn-type="con">
          <p>Conceived and designed the experiments: VM JB SC AS. Performed the experiments: VM JB SC AS. Analyzed the data: VM JB SC AS. Contributed reagents/materials/analysis tools: VM. Wrote the paper: VM JB SC AS.</p>
        </fn>
      </author-notes><pub-date pub-type="collection">
        <year>2012</year>
      </pub-date><pub-date pub-type="epub">
        <day>20</day>
        <month>8</month>
        <year>2012</year>
      </pub-date><volume>7</volume><issue>8</issue><elocation-id>e43230</elocation-id><history>
        <date date-type="received">
          <day>11</day>
          <month>4</month>
          <year>2012</year>
        </date>
        <date date-type="accepted">
          <day>18</day>
          <month>7</month>
          <year>2012</year>
        </date>
      </history><permissions>
        
        <copyright-holder>Marian et al</copyright-holder>
        <license xlink:type="simple">
          <license-p>This is an open-access article distributed under the terms of the Creative Commons Attribution License, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
        </license>
      </permissions><abstract>
        <p>Past research has demonstrated cross-linguistic, cross-modal, and task-dependent differences in neighborhood density effects, indicating a need to control for neighborhood variables when developing and interpreting research on language processing. The goals of the present paper are two-fold: (1) to introduce CLEARPOND (Cross-Linguistic Easy-Access Resource for Phonological and Orthographic Neighborhood Densities), a centralized database of phonological and orthographic neighborhood information, both within and between languages, for five commonly-studied languages: Dutch, English, French, German, and Spanish; and (2) to show how CLEARPOND can be used to compare general properties of phonological and orthographic neighborhoods across languages. CLEARPOND allows researchers to input a word or list of words and obtain phonological and orthographic neighbors, neighborhood densities, mean neighborhood frequencies, word lengths by number of phonemes and graphemes, and spoken-word frequencies. Neighbors can be defined by substitution, deletion, and/or addition, and the database can be queried separately along each metric or summed across all three. Neighborhood values can be obtained both within and across languages, and outputs can optionally be restricted to neighbors of higher frequency. To enable researchers to more quickly and easily develop stimuli, CLEARPOND can also be searched by features, generating lists of words that meet precise criteria, such as a specific range of neighborhood sizes, lexical frequencies, and/or word lengths. CLEARPOND is freely-available to researchers and the public as a searchable, online database and for download at <ext-link ext-link-type="uri" xlink:href="http://clearpond.northwestern.edu" xlink:type="simple">http://clearpond.northwestern.edu</ext-link>.</p>
      </abstract><funding-group>
        <funding-statement>Grant NICHD RO1 HD059858-01A to Viorica Marian. The funders had no role in this study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
      </funding-group><counts>
        <page-count count="11"/>
      </counts></article-meta>
  </front>
  <body>
    <sec id="s1">
      <title>Introduction</title>
      <sec id="s1a">
        <title>Phonological and Orthographic Neighborhood Densities</title>
        <p>In research on language, neighborhoods are a conglomeration of words that are highly similar to one another along a critical characteristic. Most commonly, neighbors are defined on the basis of shared linguistic features such as orthography, phonology, or semantics. Because a word’s neighborhood size (i.e., the number of neighbors it has; also called neighborhood density) can have an impact on a variety of linguistic tasks and processes, it has become an important psycholinguistic metric. However, in spite of the focus on neighbors in psycholinguistic research, neighbors are inconsistently identified, particularly across languages. These inconsistencies, which often arise as a result of researchers employing different databases, make it difficult to compare the effects of neighborhood density across studies. The current paper has two goals: (1) to introduce a centralized database of neighborhood information for five commonly-studied languages – Dutch, English, French, German, and Spanish – and provide a single corpus through which neighborhoods can be indexed cross-linguistically; and (2) to compare general properties of neighborhoods across these five languages using this database in order to determine where and how languages differ in respect to their neighborhoods.</p>
        <p>In the current paper, we examined two types of linguistic neighborhoods – orthographic and phonological. Orthographic neighborhoods are often defined according to Coltheart, Davelaar, Jonasson, and Besner’s <xref ref-type="bibr" rid="pone.0043230-Coltheart1">[1]</xref> <italic>N</italic> metric, which refers to the number of words that can be constructed by substituting one letter of the target word. For example, the word <italic>log</italic> has <italic>hog</italic>, <italic>lug</italic>, and <italic>lot</italic> as orthographic neighbors. Phonological neighborhoods are calculated similarly, but instead of depending on grapheme substitution, phonological neighbors are constructed by substituting one phoneme of the target word <xref ref-type="bibr" rid="pone.0043230-Luce1">[2]</xref>. <italic>Fish</italic> (/fι∫), for example, has <italic>dish</italic> (/dι∫/) and <italic>fig</italic> (/fιg/) as phonological neighbors. These “substitution neighbors” have historically been the focus of the literature and have dominated investigations of neighborhood size. However, research has also investigated the effects of addition (formed by the addition of a grapheme or phoneme, for example <italic>and</italic> has <italic>hand</italic> as an orthographic addition neighbor) and deletion (formed by the deletion of a grapheme or phoneme, for example <italic>bend</italic> has <italic>end</italic> as an orthographic deletion neighbor) neighbors <xref ref-type="bibr" rid="pone.0043230-Davis1">[3]</xref>.</p>
        <p>The effects of phonological and orthographic neighborhood density on language processing have been well documented across a variety of tasks <xref ref-type="bibr" rid="pone.0043230-Siakaluk1">[4]</xref>–<xref ref-type="bibr" rid="pone.0043230-BaeseBerk1">[11]</xref> and across multiple languages <xref ref-type="bibr" rid="pone.0043230-Marian1">[12]</xref>–<xref ref-type="bibr" rid="pone.0043230-Frauenfelder1">[15]</xref>. However, in spite of the prevalence of neighborhood effects, the nature of these effects is subject to debate. For example, neighborhood density may affect recognition and production processes differently <xref ref-type="bibr" rid="pone.0043230-Dell1">[16]</xref>, <xref ref-type="bibr" rid="pone.0043230-Gahl1">[17]</xref>, and effects may vary depending on the language of presentation <xref ref-type="bibr" rid="pone.0043230-Vitevitch1">[13]</xref>, <xref ref-type="bibr" rid="pone.0043230-Luce2">[18]</xref>, <xref ref-type="bibr" rid="pone.0043230-Vitevitch2">[19]</xref> (but see <xref ref-type="bibr" rid="pone.0043230-Baus1">[14]</xref>). The ongoing debate surrounding neighborhood density effects, particularly across languages, underscores the need for resources that allow researchers to consistently identify orthographic and phonological neighbors across studies. For some languages, even the most basic descriptive data are not available, forcing researchers to continually recreate basic neighborhood and frequency statistics. Furthermore, even when descriptive statistics are available <xref ref-type="bibr" rid="pone.0043230-Vitevitch1">[13]</xref>, <xref ref-type="bibr" rid="pone.0043230-Frauenfelder1">[15]</xref>, <xref ref-type="bibr" rid="pone.0043230-Balota1">[20]</xref>, <xref ref-type="bibr" rid="pone.0043230-Duyck1">[21]</xref>, direct cross-linguistic comparisons are often not reported or possible.</p>
        <p>While there have been some attempts to create consistent corpora from which neighborhood information can be derived, these corpora vary across languages. For example, N-Watch, a database of English neighborhood information <xref ref-type="bibr" rid="pone.0043230-Davis3">[22]</xref>, defines phonological neighbors according to the substitution of a single phoneme in any word position. BuscaPalabras, a database of Spanish neighborhood information <xref ref-type="bibr" rid="pone.0043230-Davis2">[8]</xref>, and E-Hitz, a database of Basque neighborhood information <xref ref-type="bibr" rid="pone.0043230-Perea1">[23]</xref>, define phonological neighbors according to those same rules, but also include words that differ by the addition or deletion of a phoneme from any word position.</p>
        <fig id="pone-0043230-g001" orientation="portrait" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g001</object-id>
          <label>Figure 1</label>
          <caption>
            <title>(a) Word frequency (per million) across Dutch, English, French, German, and Spanish.</title>
            <p>Equating corpus sizes (left) resulted in average word frequencies that were comparable across languages; size-equated corpora were thus used in all further analyses. If, instead, corpus size was defined only by a frequency threshold (right), differences in average word frequency emerged. (b) Word frequency distributions for each language, using equivalent corpus sizes.</p>
          </caption>
          <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g001" xlink:type="simple"/>
        </fig>
        <p>The goal of this paper is therefore to introduce CLEARPOND, the Cross-Linguistic Easy-Access Resource for Phonological and Orthographic Neighborhood Densities, a catalog of neighborhood density across languages. Perhaps the most comprehensive psycholinguistic database to date is WordGen <xref ref-type="bibr" rid="pone.0043230-Duyck1">[21]</xref>, which queries the CELEX and Lexique databases to provide searchable datasets for Dutch, English, German, and French. While WordGen controls for factors such as written word frequency, orthographic neighborhood size, bigram frequency, and word length, it is missing a number of relevant features including information on phonological neighbors, neighborhood frequency, and the ability to index neighbors across languages. The database that we present here has been controlled for word frequency to ensure that consistent and comparable tokens are sampled from each language, and provides data regarding word length, neighborhood density, and neighborhood frequency. We also provide measures of foreign neighborhoods (i.e., the number of Spanish neighbors of an English word, or English neighbors of a Spanish word, etc.) for use in bilingual comparisons. Neighborhoods are defined both orthographically and phonologically, with stimuli derived from film and television subtitle corpora that capture spoken word frequencies. Finally, we have defined neighborhoods by substitution, addition, and deletion. It is our intent that CLEARPOND will provide a standard from which neighborhood data can be easily extracted and that it will provide a comprehensive tool for psycholinguistic researchers.</p>
        <fig id="pone-0043230-g002" orientation="portrait" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g002</object-id>
          <label>Figure 2</label>
          <caption>
            <title>Distribution of orthographic word lengths for Dutch, English, French, German, and Spanish.</title>
          </caption>
          <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g002" xlink:type="simple"/>
        </fig>
      </sec>
    </sec>
    <sec id="s2" sec-type="methods">
      <title>Methods</title>
      <sec id="s2a">
        <title>Selection of Corpora</title>
        <p>To examine phonological and orthographic neighborhood densities across languages, we selected corpora for the following languages: Dutch (SUBTLEX-NL) <xref ref-type="bibr" rid="pone.0043230-Keuleers1">[24]</xref>, English (SUBTLEX-US) <xref ref-type="bibr" rid="pone.0043230-Brysbaert1">[25]</xref>, French (Lexique) <xref ref-type="bibr" rid="pone.0043230-New1">[26]</xref>, German (SUBTLEX-DE) <xref ref-type="bibr" rid="pone.0043230-Brysbaert2">[27]</xref>, and Spanish (SUBTLEX-ESP) <xref ref-type="bibr" rid="pone.0043230-Cuetos1">[28]</xref>. Misspellings, including culturally-defined spellings (e.g., British “colour”), and foreign language intrusions (e.g., the English word “mind” in the Spanish corpus) were removed by cross-referencing each subtitle corpus with a dictionary in that language. Because all five corpora use the same source-material (i.e., film and television subtitles) to derive frequency data, they are highly comparable and well suited for cross-language comparisons. To increase similarity among the corpora, homographs were removed from the French corpus to match the parameters of the Dutch, English, German, and Spanish corpora (none of which distinguish between the different meanings of homographs). French homographs were reduced to a single entry, and the frequency per million of the collapsed entry was created by adding the frequency per million of each of the homographs. For example, the French word <italic>est</italic> is the third person singular form of the verb meaning “to be,” and has a frequency of 19,417 per million; <italic>est</italic> is also the French word for the cardinal direction East, which has a frequency of 81 per million. We collapsed these two entries into a single entry, <italic>est</italic>, that had a frequency of 19,498 per million.</p>
        <fig id="pone-0043230-g003" orientation="portrait" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g003</object-id>
          <label>Figure 3</label>
          <caption>
            <title>Mean orthographic neighborhood sizes for words in Dutch, English, French, German, and Spanish.</title>
            <p>Total mean neighborhood size (left group) includes single-letter substitutions (e.g., ‘log’ for ‘hog’), deletions (e.g., ‘end’ for ‘bend’) and additions (e.g., ‘hand’ for ‘and’).</p>
          </caption>
          <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g003" xlink:type="simple"/>
        </fig>
        <fig id="pone-0043230-g004" orientation="portrait" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g004</object-id>
          <label>Figure 4</label>
          <caption>
            <title>Distribution of orthographic neighborhood densities across Dutch, English, French, German, and Spanish (log-log scale).</title>
          </caption>
          <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g004" xlink:type="simple"/>
        </fig>
        <p>Using large corpora (the subtitle lexicons range from 74,286 to 441,132 tokens) can lead to overestimations of neighborhood size compared to people’s actual working vocabularies. By only including words above a certain frequency threshold, the effect of very low frequency words (which are unlikely to be in people’s everyday, working vocabularies) on neighborhood calculations is reduced. In the present study, a frequency threshold of 0.34 per million was used, based on the standard used by Davis <xref ref-type="bibr" rid="pone.0043230-Davis3">[22]</xref>. This frequency cutoff yielded a corpus size of 27,751 for English, which compares favorably to English vocabulary size estimates for educated adults (20,000 word families) <xref ref-type="bibr" rid="pone.0043230-Goulden1">[29]</xref>. However, the frequency cutoff yielded different corpus sizes across languages (Dutch: <italic>N</italic> = 31,691; English: <italic>N</italic> = 27,751; French: <italic>N</italic> = 34,113; German: <italic>N</italic> = 45,027; Spanish: <italic>N</italic> = 41,968), which would limit our ability to make cross-linguistic comparisons. Larger corpora are likely to inflate neighborhood size estimates, as a larger overall sample pool results in a larger pool of potential neighbor-candidates. To alleviate this concern, corpus size was equated across languages by including the 27,751 most frequent words in each language (based on the smallest corpus, English) in all further comparisons. <xref ref-type="fig" rid="pone-0043230-g001">Figure 1a</xref> (left) shows that when corpus size was equated, the languages had comparable average frequencies (Dutch: 32.58, <italic>SEM</italic> = 3.10; English: 32.72, <italic>SEM</italic> = 3.18; French: 30.87, <italic>SEM</italic> = 2.64; German: 33.74, <italic>SEM</italic> = 2.74; Spanish: 33.87, <italic>SEM</italic> = 3.02), while <xref ref-type="fig" rid="pone-0043230-g001">Figure 1a</xref> (right) indicates that the languages differed in average frequency when corpus size was instead defined by a frequency threshold. In addition, frequency distributions (<xref ref-type="fig" rid="pone-0043230-g001">Figure 1b</xref>) were comparable across languages when corpus size was equated. Together, these results provide support for the ability to make direct comparisons between the size-equated corpora.</p>
        <fig id="pone-0043230-g005" orientation="portrait" position="float">
          <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g005</object-id>
          <label>Figure 5</label>
          <caption>
            <title>Average orthographic neighborhood size of words in Dutch, English, French, German, and Spanish at each word length.</title>
          </caption>
          <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g005" xlink:type="simple"/>
        </fig>
      </sec>
      <sec id="s2b">
        <title>Calculating Neighborhoods</title>
        <sec id="s2b1">
          <title>Orthographic neighborhoods</title>
          <p>Orthographic neighbors consisted of words that differed only by the addition, deletion, or substitution of a single grapheme, as this method of calculating neighbors (including addition, deletion, and subtraction neighbors) provides a stronger metric of the lexical-level influence of neighborhood density than typical measures of substitution neighbors alone <xref ref-type="bibr" rid="pone.0043230-Davis1">[3]</xref>. For example, the word <italic>plant</italic> has neighbors like <italic>planet</italic> (addition), <italic>plan</italic> (deletion), and <italic>plank</italic> (substitution). Likewise, the English word <italic>chief</italic> and the French word <italic>chien</italic> (meaning dog) are cross-linguistic orthographic neighbors because they differ only in the substitution of a single grapheme, “n” for “f.” Accented vowels and the Spanish “ñ” were treated as separate graphemes; therefore, words such as the French <italic>ou</italic> (English: ‘or’) and <italic>où</italic> (English: ‘where’) were considered to be orthographic neighbors.</p>
          <fig id="pone-0043230-g006" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g006</object-id>
            <label>Figure 6</label>
            <caption>
              <title>Average orthographic neighborhood size as a function of word frequency.</title>
              <p>Frequency bins are evenly spaced divisions of words in 5% increments. Bin one represents the average orthographic neighborhood size of the top 5% most frequent words in the language, bin twenty represents the average orthographic neighborhood size of the 5% least frequent words.</p>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g006" xlink:type="simple"/>
          </fig>
        </sec>
        <sec id="s2b2">
          <title>Phonological neighborhoods</title>
          <p>Phonological transcriptions of each orthographic entry in the corpora were created using eSpeak (<ext-link ext-link-type="uri" xlink:href="http://espeak.sourceforge.net/" xlink:type="simple">http://espeak.sourceforge.net/</ext-link>), an open-source text-to-speech software that provides IPA transcriptions for multiple languages. With this method, the phonological transcriptions of the corpora used machine-readable phonetic symbols based on the International Phonetic Alphabet so that language-to-language neighborhood comparisons were viable. To ensure the validity of eSpeak transcriptions, we selected a subset of words from each language that existed in both CLEARPOND and in a phonetic database for that language and calculated phonological neighborhoods (including substitution, addition, and deletion neighbors) for each word twice, once using the output provided by eSpeak and once using the output from the external database. The neighborhoods obtained by the two different metrics were very highly correlated: Dutch eSpeak comparison with the CELEX database <xref ref-type="bibr" rid="pone.0043230-Baayen1">[30]</xref>: <italic>N</italic> = 26,358, <italic>R</italic> = 0.94, <italic>p</italic>&lt;0.001; English eSpeak comparison with the CMU database <xref ref-type="bibr" rid="pone.0043230-Weide1">[31]</xref>: <italic>N</italic> = 26,474, <italic>R</italic> = 0.97, <italic>p</italic>&lt;0.001; French eSpeak comparison with the Lexique database <xref ref-type="bibr" rid="pone.0043230-New1">[26]</xref>: <italic>N</italic> = 27,751, <italic>R</italic> = 0.96, <italic>p</italic>&lt;0.001; German eSpeak comparison with the CELEX database <xref ref-type="bibr" rid="pone.0043230-Baayen1">[30]</xref>: <italic>N</italic> = 21,609, <italic>R</italic> = 0.93, <italic>p</italic>&lt;0.001; Spanish eSpeak comparison with the Busca Palabras database <xref ref-type="bibr" rid="pone.0043230-Davis2">[8]</xref>: <italic>N</italic> = 10,978, <italic>R</italic> = 0.97, <italic>p</italic>&lt;0.001. For examples of words in each language that correspond to each phoneme, see <xref ref-type="supplementary-material" rid="pone.0043230.s001">Table S1</xref> and <xref ref-type="supplementary-material" rid="pone.0043230.s002">Table S2</xref>.</p>
          <fig id="pone-0043230-g007" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g007</object-id>
            <label>Figure 7</label>
            <caption>
              <title>Ratio of within-language and foreign orthographic neighbors as part of total neighborhood size for each word with at least one neighbor.</title>
              <p>The top row compares the proportion of English within-language neighbors (blue) to foreign neighbors in each other language. The bottom row compares the proportion of within-language neighbors in each language to foreign (i.e., English) neighbors (blue).</p>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g007" xlink:type="simple"/>
          </fig>
          <table-wrap id="pone-0043230-t001" orientation="portrait" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0043230.t001</object-id><label>Table 1</label><caption>
              <title>Mean orthographic within-language neighborhood size and foreign neighborhood size.</title>
            </caption><alternatives>
              <graphic id="pone-0043230-t001-1" mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.t001" xlink:type="simple"/>
              <table>
                <colgroup span="1">
                  <col align="left" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                </colgroup>
                <thead>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">Language</td>
                    <td align="left" colspan="1" rowspan="1">Within-Language Neighborhood Size</td>
                    <td align="left" colspan="5" rowspan="1">Foreign Neighborhood Size</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1"/>
                    <td align="left" colspan="1" rowspan="1"/>
                    <td align="left" colspan="1" rowspan="1">English</td>
                    <td align="left" colspan="1" rowspan="1">Dutch</td>
                    <td align="left" colspan="1" rowspan="1">French</td>
                    <td align="left" colspan="1" rowspan="1">German</td>
                    <td align="left" colspan="1" rowspan="1">Spanish</td>
                  </tr>
                </thead>
                <tbody>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">English</td>
                    <td align="left" colspan="1" rowspan="1">2.83 (0.03)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">1.00 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">1.00 (0.01)</td>
                    <td align="left" colspan="1" rowspan="1">0.99 (0.01)</td>
                    <td align="left" colspan="1" rowspan="1">0.63 (0.01)</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">Dutch</td>
                    <td align="left" colspan="1" rowspan="1">2.00 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">1.00 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">French</td>
                    <td align="left" colspan="1" rowspan="1">2.35 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">1.00 (0.01)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">German</td>
                    <td align="left" colspan="1" rowspan="1">1.97 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">0.99 (0.01)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">Spanish</td>
                    <td align="left" colspan="1" rowspan="1">2.23 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">0.63 (0.01)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                </tbody>
              </table>
            </alternatives><table-wrap-foot>
              <fn id="nt101">
                <p><italic>Note</italic>. Values represent means, those in parentheses represent standard error of the mean.</p>
              </fn>
            </table-wrap-foot></table-wrap>
          <p>Phonological neighbors were composed of words that differed in the addition, deletion or substitution of a single phoneme <xref ref-type="bibr" rid="pone.0043230-Luce2">[18]</xref>, <xref ref-type="bibr" rid="pone.0043230-Vitevitch3">[32]</xref>. For instance, the English word <italic>dough</italic> (/doυ/) shares a neighborhood with words like <italic>dome</italic> (/doυm/; addition), <italic>owe</italic> (/oυ/; deletion), and <italic>show</italic> (/∫oυ/; substitution) in English. In addition, the English word <italic>eel</italic> (/il/) and the Spanish word <italic>hilo</italic> (/ilo/) are cross-linguistic neighbors by virtue of the deletion of the final phoneme/o/in the Spanish word.</p>
          <p>Because the same subtitle corpora were used to calculate both orthographic and phonological neighborhoods, qualitative comparisons can be made across neighborhood types.</p>
          <fig id="pone-0043230-g008" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g008</object-id>
            <label>Figure 8</label>
            <caption>
              <title>Distributions of phonological word lengths for Dutch, English, French, German, and Spanish.</title>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g008" xlink:type="simple"/>
          </fig>
        </sec>
        <sec id="s2b3">
          <title>Foreign neighborhoods</title>
          <p>The same methods that were used to calculate orthographic and phonological neighborhoods within languages were used to calculate foreign neighbors. We calculated the Dutch, French, German, and Spanish neighbors of every English word, as well as the English neighbors of every Dutch, French, German, and Spanish word. For these analyses, the pool of candidate neighbors included all 27,751 words within the foreign language’s database. Because these foreign neighborhoods were constructed using the same databases used to calculate within-language neighborhoods, foreign and within-language neighborhoods of each language can be easily compared.</p>
          <fig id="pone-0043230-g009" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g009</object-id>
            <label>Figure 9</label>
            <caption>
              <title>Mean phonological neighborhood sizes for words in Dutch, English, French, German, and Spanish.</title>
              <p>Total mean neighborhood size (left group) includes single-phoneme substitutions (e.g., ‘show’ for ‘dough’), deletions (e.g., ‘owe’ for ‘dough) and additions (e.g., ‘dome’ for ‘dough).</p>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g009" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
    </sec>
    <sec id="s3">
      <title>Results</title>
      <sec id="s3a">
        <title>Orthographic Neighborhoods</title>
        <sec id="s3a1">
          <title>Orthographic word length</title>
          <p>Average word length (in graphemes) was calculated for all 27,751 words in each language and was 8.41 (<italic>SD</italic> = 2.79) for Dutch, 7.26 (<italic>SD</italic> = 2.28) for English, 7.85 (<italic>SD</italic> = 2.26) for French, 8.25 (<italic>SD</italic> = 2.86) for German, and 7.94 (<italic>SD</italic> = 2.24) for Spanish; <italic>F</italic>(4,138750) = 879.66, <italic>p</italic>&lt;0.001. Follow-up tests revealed that group differences were significant between all language pairs. The distribution of word lengths for each language is shown in <xref ref-type="fig" rid="pone-0043230-g002">Figure 2</xref>.</p>
          <fig id="pone-0043230-g010" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g010</object-id>
            <label>Figure 10</label>
            <caption>
              <title>Distribution of phonological neighborhood densities across Dutch, English, French, German, and Spanish (log-log scale).</title>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g010" xlink:type="simple"/>
          </fig>
        </sec>
        <sec id="s3a2">
          <title>Orthographic neighborhood size</title>
          <p>The number of within-language substitution, addition, and deletion neighbors was calculated for each word in each language. The mean neighborhood sizes are shown in <xref ref-type="fig" rid="pone-0043230-g003">Figure 3</xref>. For analysis purposes, the longest 5% of all words were collapsed into a single entry. An ANOVA with language and word length as factors revealed a significant effect of language on total orthographic neighborhood size, <italic>F</italic>(4,138690) = 12.69, <italic>p</italic>&lt;0.0001, a significant effect of word length <italic>F</italic>(12,138690) = 9829.49, <italic>p</italic>&lt;0.0001, and a significant language x word length interaction <italic>F</italic>(48,138690) = 222.25, <italic>p</italic>&lt;0.0001. Post-hoc comparisons on the estimated marginal means for language revealed that English words contained significantly more neighbors than words in Dutch, French, German, or Spanish (all <italic>p</italic>’s &lt;0.05).</p>
          <p>While the effect of substitution neighbors on linguistic processing has long been studied, recent evidence suggests that addition and deletion neighbors affect word processing as well <xref ref-type="bibr" rid="pone.0043230-Davis1">[3]</xref>. To best characterize the effect of orthographic neighbors on word processing, all further analyses will consider the sum total of substitution, deletion, and addition neighbors for each word.</p>
          <fig id="pone-0043230-g011" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g011</object-id>
            <label>Figure 11</label>
            <caption>
              <title>Average phonological neighborhood size of words in Dutch, English, French, German, and Spanish at each word length.</title>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g011" xlink:type="simple"/>
          </fig>
          <fig id="pone-0043230-g012" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g012</object-id>
            <label>Figure 12</label>
            <caption>
              <title>Average phonological neighborhood size as a function of word frequency.</title>
              <p>Frequency bins are evenly spaced divisions of words in 5% increments. Bin one represents the average phonological neighborhood size of the top 5% most frequent words in the language, bin twenty represents the average phonological neighborhood size of the 5% least frequent words.</p>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g012" xlink:type="simple"/>
          </fig>
          <table-wrap id="pone-0043230-t002" orientation="portrait" position="float"><object-id pub-id-type="doi">10.1371/journal.pone.0043230.t002</object-id><label>Table 2</label><caption>
              <title>Mean phonological within-language and foreign neighborhood size.</title>
            </caption><alternatives>
              <graphic id="pone-0043230-t002-2" mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.t002" xlink:type="simple"/>
              <table>
                <colgroup span="1">
                  <col align="left" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                  <col align="center" span="1"/>
                </colgroup>
                <thead>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">Language</td>
                    <td align="left" colspan="1" rowspan="1">Within-Language Neighborhood Size</td>
                    <td align="left" colspan="5" rowspan="1">Foreign Neighborhood Size</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1"/>
                    <td align="left" colspan="1" rowspan="1"/>
                    <td align="left" colspan="1" rowspan="1">English</td>
                    <td align="left" colspan="1" rowspan="1">Dutch</td>
                    <td align="left" colspan="1" rowspan="1">French</td>
                    <td align="left" colspan="1" rowspan="1">German</td>
                    <td align="left" colspan="1" rowspan="1">Spanish</td>
                  </tr>
                </thead>
                <tbody>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">English</td>
                    <td align="left" colspan="1" rowspan="1">5.49 (0.06)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">0.89 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">1.23 (0.04)</td>
                    <td align="left" colspan="1" rowspan="1">0.89 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">0.15 (0.01)</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">Dutch</td>
                    <td align="left" colspan="1" rowspan="1">3.05 (0.04)</td>
                    <td align="left" colspan="1" rowspan="1">0.89 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">French</td>
                    <td align="left" colspan="1" rowspan="1">10.32 (0.10)</td>
                    <td align="left" colspan="1" rowspan="1">1.23 (0.04)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">German</td>
                    <td align="left" colspan="1" rowspan="1">3.02 (0.03)</td>
                    <td align="left" colspan="1" rowspan="1">0.89 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                  <tr>
                    <td align="left" colspan="1" rowspan="1">Spanish</td>
                    <td align="left" colspan="1" rowspan="1">2.63 (0.02)</td>
                    <td align="left" colspan="1" rowspan="1">0.15 (0.01)</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                    <td align="left" colspan="1" rowspan="1">–</td>
                  </tr>
                </tbody>
              </table>
            </alternatives><table-wrap-foot>
              <fn id="nt102">
                <p><italic>Note</italic>. Values represent means, those in parentheses represent standard error of the mean.</p>
              </fn>
            </table-wrap-foot></table-wrap>
          <fig id="pone-0043230-g013" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g013</object-id>
            <label>Figure 13</label>
            <caption>
              <title>Ratio of within-language and foreign phonological neighbors as part of total neighborhood size for each word.</title>
              <p>The top row compares the proportion of English within-language neighbors (blue) to foreign neighbors in each other language. The bottom row compares the proportion of within-language neighbors in each language to foreign (i.e., English) neighbors (blue).</p>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g013" xlink:type="simple"/>
          </fig>
          <fig id="pone-0043230-g014" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g014</object-id>
            <label>Figure 14</label>
            <caption>
              <title>Comparisons of orthographic and phonological word lengths for Dutch, English, French, German, and Spanish.</title>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g014" xlink:type="simple"/>
          </fig>
        </sec>
        <sec id="s3a3">
          <title>Distribution of orthographic neighborhood densities</title>
          <p><xref ref-type="fig" rid="pone-0043230-g004">Figure 4</xref> shows the distribution of neighborhood densities across languages. The percentage of words in each language with at least one orthographic neighbor was 55.3% for Dutch, 64.1% for English, 77.2% for French, 61.0% for German, and 74.7% for Spanish.</p>
        </sec>
        <sec id="s3a4">
          <title>Orthographic neighborhood size by word length</title>
          <p><xref ref-type="fig" rid="pone-0043230-g005">Figure 5</xref> shows the average neighborhood size of words in each language for each word length.</p>
        </sec>
        <sec id="s3a5">
          <title>Orthographic neighborhood size by word frequency</title>
          <p>In each language, all 27,751 words were divided into twenty equally spaced frequency bins, with each bin representing a 5% increment. For example, bin one represented the average orthographic neighborhood size of the top 5% most frequent words in the language while bin 20 represented the average orthographic neighborhood size of the least frequent 5% of words. The average orthographic neighborhood size for words in each of these frequency bins is provided in <xref ref-type="fig" rid="pone-0043230-g006">Figure 6</xref>.</p>
        </sec>
        <sec id="s3a6">
          <title>Foreign orthographic neighbors</title>
          <p>Foreign orthographic neighborhoods were calculated for each English word in Dutch, French, German, and Spanish, and for each Dutch, French, German, and Spanish word in English. Results revealed that 21.2% of English words had at least one Dutch neighbor, 31.7% had at least one French neighbor, 23.6% had at least one German neighbor, and 21.7% had at least one Spanish neighbor. In addition, 28.0% of Dutch words, 33.9% of French words, 30.0% of German words, and 22.8% of Spanish words had at least one English neighbor. The effect of foreign neighbors on orthographic neighborhood size is provided in <xref ref-type="table" rid="pone-0043230-t001">Table 1</xref>. For each word with at least one within-language or foreign neighbor, the relative proportion of neighbors to all of a word’s neighbors was calculated. Mean proportions are provided in <xref ref-type="fig" rid="pone-0043230-g007">Figure 7</xref>.</p>
          <fig id="pone-0043230-g015" orientation="portrait" position="float">
            <object-id pub-id-type="doi">10.1371/journal.pone.0043230.g015</object-id>
            <label>Figure 15</label>
            <caption>
              <title>Screen-shot of the EnglishPOND portion of the CLEARPOND website, accessible at</title>
              <p><ext-link ext-link-type="uri" xlink:href="http://clearpond.northwestern.edu" xlink:type="simple">http://clearpond.northwestern.edu</ext-link><bold>. CLEARPOND provides a user-friendly, web-based interface for obtaining Dutch, English, French, German, and Spanish phonological and orthographic neighborhood densities (or, PONDs).</bold> The search function allows users to search for POND information in any of the five languages using single word queries or by providing full lists of words. CLEARPOND provides a number of important psycholinguistic measures, such as neighborhood density and neighborhood frequency, both for within-language neighbors and foreign-language neighbors. With user-controlled output selection, researchers can choose the output parameters that are most relevant. In addition to allowing users to acquire data for specific words, CLEARPOND can also search by features so that researchers can generate new lists of words that meet precise criteria, such as a specific range of neighborhood sizes or lexical frequency (as provided by the Subtlex databases). Furthermore, multiple filters can be applied simultaneously, providing greater control for stimuli creation. Users also have the option of exporting their results directly to a text file, making it easy to create downloadable documents containing pertinent psycholinguistic measures for all of their stimuli. In addition to the web-based interface, more comprehensive lists containing all of the information provided by the database are available for download, so that the entire CLEARPOND database can be accessed offline.</p>
            </caption>
            <graphic mimetype="image" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.g015" xlink:type="simple"/>
          </fig>
        </sec>
      </sec>
      <sec id="s3b">
        <title>Phonological Neighborhoods</title>
        <sec id="s3b1">
          <title>Phonological word length</title>
          <p>Average word length (in phonemes) was calculated for all 27,751 words in each language and was 7.48 (<italic>SD</italic> = 2.51) for Dutch, 6.09 (<italic>SD</italic> = 2.18) for English, 5.77 (<italic>SD</italic> = 1.93) for French, 7.14 (<italic>SD</italic> = 2.45) for German, and 7.84 (<italic>SD</italic> = 2.28) for Spanish; <italic>F</italic>(4,138750) = 4284.86, <italic>p</italic>&lt;0.001. Follow-up tests revealed that group differences were significant between all language pairs. The distribution of word lengths for each language is shown in <xref ref-type="fig" rid="pone-0043230-g008">Figure 8</xref>.</p>
        </sec>
        <sec id="s3b2">
          <title>Phonological neighborhood size</title>
          <p>The number of within-language substitution, addition, and deletion neighbors was calculated for each word in each language. The mean neighborhood sizes are shown in <xref ref-type="fig" rid="pone-0043230-g009">Figure 9</xref>. For analysis purposes, the longest 5% of all words were collapsed into a single entry. An ANOVA with language and word length as factors revealed a significant effect of language on total phonological neighborhood size, <italic>F</italic>(4,138695) = 2730.64, <italic>p</italic>&lt;0.0001, a significant effect of word length <italic>F</italic>(11,138695) = 10204.84, <italic>p</italic>&lt;0.0001, and a significant language x word length interaction <italic>F</italic>(44,138695) = 913.84, <italic>p</italic>&lt;0.0001. Post-hoc comparisons on the estimated marginal means for language revealed that all languages differed on phonological neighborhood size (all <italic>p</italic>’s&lt;0.05). As in the orthographic neighborhood analyses, all further phonological neighborhood analyses consider the total number of substitution, addition, and deletion neighbors for each word.</p>
        </sec>
        <sec id="s3b3">
          <title>Distribution of phonological neighborhood densities</title>
          <p><xref ref-type="fig" rid="pone-0043230-g010">Figure 10</xref> shows the distribution of phonological neighborhood densities across languages. The percentage of words in each language with at least one neighbor was 55.2% for Dutch, 69.1% for English, 75.5% for French, 61.9% for German, and 74.6% for Spanish.</p>
        </sec>
        <sec id="s3b4">
          <title>Phonological neighborhood size by word length</title>
          <p><xref ref-type="fig" rid="pone-0043230-g011">Figure 11</xref> shows the average neighborhood size in each language for each word length.</p>
        </sec>
        <sec id="s3b5">
          <title>Phonological neighborhood size by word frequency</title>
          <p>In each language, all 27,751 words were divided into twenty equally spaced frequency bins (as was done with orthographic neighborhoods). The average phonological neighborhood size for words in each frequency bin is provided in <xref ref-type="fig" rid="pone-0043230-g012">Figure 12</xref>.</p>
        </sec>
        <sec id="s3b6">
          <title>Foreign phonological neighbors</title>
          <p>Foreign phonological neighborhoods were calculated for each English word in Dutch, French, German, and Spanish, and for each Dutch, French, German, and Spanish word in English. Results revealed that 15.9% of English words had at least one Dutch neighbor, 10.6% had at least one French neighbor, 15.8% had at least one German neighbor, and 4.8% had at least one Spanish neighbor. In addition, 10.8% of Dutch words, 12.0% of French words, 12.4% of German words, and 1.6% of Spanish words had at least one English neighbor. The effect of foreign neighbors on phonological neighborhood size is provided in <xref ref-type="table" rid="pone-0043230-t002">Table 2</xref>. For each word with at least one within-language or foreign neighbor, the relative proportion of within-language or foreign neighbors to all of a word’s neighbors was calculated. Mean proportions are provided in <xref ref-type="fig" rid="pone-0043230-g013">Figure 13</xref>.</p>
        </sec>
      </sec>
    </sec>
    <sec id="s4">
      <title>Discussion</title>
      <p>The results of our analyses show consistent patterns across languages in the effects of word length and lexical frequency on neighborhood size. Differences across languages are also present – for example, while French has the most phonological neighbors, English contains more orthographic neighbors than the other four languages examined. The degree of similarity between phonological and orthographic neighbors also varies across languages (e.g., in Spanish, phonological and orthographic neighborhoods are more alike than in any other language). Within languages, differences emerge dependent on neighborhood origin; foreign neighbors are relatively infrequent compared to within-language neighbors.</p>
      <sec id="s4a">
        <title>Comparing Orthographic and Phonological Neighborhoods</title>
        <p>Because the present analysis derived orthographic and phonological neighborhoods from the same subtitle corpora, we were able to make direct comparisons between the two neighborhood types. The differences that emerge in the relationships between these neighborhoods across languages can potentially be used to illuminate differences in language transparency. Transparency, or orthographic depth, is a measure of how closely a language maintains a one-to-one grapheme-phoneme correspondence; the more transparent a language, the more the graphemes and phonemes are tightly matched. For example, in the most transparent of languages, each phoneme would map to only one grapheme and vice versa (e.g., the Spanish phoneme/m/is always represented by the grapheme <italic>m</italic>, and the <italic>m</italic> grapheme always corresponds to the phoneme/m/). Conversely, opaque languages are those in which grapheme-phoneme mappings are less consistent; multiple graphemes can represent the same phoneme (e.g., English <italic>k</italic> and <italic>c</italic> can both represent the phoneme/k/), and more than one phoneme may be represented by a single grapheme (e.g., English <italic>g</italic> can represent the phonemes/g/and/ <inline-formula><inline-graphic xlink:href="info:doi/10.1371/journal.pone.0043230.e001" xlink:type="simple"/></inline-formula>/). Because the grapheme-phoneme mappings of transparent languages are consistent, in these languages, many orthographic neighbors are also phonological neighbors. When phonemes and graphemes are consistently matched, the phonetic transcriptions of words mirror the orthographic structure. Therefore, when a single <italic>grapheme</italic> substitution (or addition or deletion) results in the creation of a new word, it is likely that the new word similarly differs from the original in only one <italic>phoneme</italic>.</p>
        <p>Our analyses suggest that, in addition to indexing language transparency as a strict match between grapheme-phoneme correspondences, there may be a relationship between a language’s transparency and the degree of similarity between the language’s orthographic and phonological neighborhoods. For example, Spanish and German (both considered to be transparent languages <xref ref-type="bibr" rid="pone.0043230-Seymour1">[33]</xref>), demonstrate a high degree of similarity in the distributions of their orthographic and phonological neighborhoods. However, the similarity between orthographic and phonological neighborhoods is not quite as tightly coupled in German as it is in Spanish, likely because, German contains specific consonant clusters (e.g., <italic>sch</italic>) that correspond to single phonemes (e.g.,/∫/). Accordingly, there is higher similarity between graphemic and phonemic word lengths in Spanish than in German, Dutch, or English (<xref ref-type="fig" rid="pone-0043230-g014">Figure 14</xref>). French, a language with a high number of silent letters and digraphs, has the largest difference between graphemic and phonemic word length.</p>
      </sec>
      <sec id="s4b">
        <title>Comparing Types of Neighbors</title>
        <p>In addition to revealing differences between phonological and orthographic neighborhoods, our data illustrate differences in how substitution, addition, and deletion neighbors are used across languages.</p>
        <sec id="s4b1">
          <title>Orthographic neighborhoods</title>
          <p>Relative to the other four languages, English contains a large number of orthographic substitution neighbors. This suggests that English makes use of more available letter sequences at every word length, and efficiently uses its graphemic space. In contrast, French derives a greater percentage of its neighbors from addition and deletion relative to the other languages. Although French has relatively few substitution neighbors, it nevertheless has the second largest total number of neighbors; this is driven by French’s increased use of addition and deletion neighbors.</p>
        </sec>
        <sec id="s4b2">
          <title>Phonological neighborhoods</title>
          <p>A notable trend that emerged in the comparison of phonological neighborhood sizes across languages is the much higher occurrence of phonological neighbors of all types (substitution, deletion, and addition) in French when compared to all other languages. One potential explanation for the observed trend is the large number of homophones in the French language.</p>
          <p>Homophones increase the phonological neighborhood density of a language because there are multiple lexical entries with the same phonological make-up. Therefore, if a word has a phonological neighbor that is one meaning of a homophonic word set, it also automatically has a phonological neighbor comprised of all other homophones. In languages such as French, where homophonic word sets are numerous, the phonetic diversity of all tokens is decreased, and the pool of potential phonological neighbors is increased. For example, the French word <italic>mer</italic> (sea) is a substitution neighbor of <italic>ver</italic> (earthworm), <italic>vers</italic> (towards), <italic>vert</italic> (green), and <italic>verre</italic> (drinking glass), which are all pronounced/vε&lt;$&gt;\raster(80%)="rg3"&lt;$&gt;/; only <italic>ver</italic> would be an orthographic neighbor. The homophone account of French’s increased phonological neighborhood density is consistent with an analysis of phonetic diversity across languages: French only contained 17,303 unique phonetic words (out of 27,751; 62.4%), compared to 27,258 in Dutch (98.0%), 27,007 in English (97.3%), 27,284 in German (98.3%), and 27,101 in Spanish (97.7%).</p>
        </sec>
      </sec>
      <sec id="s4c">
        <title>Foreign Neighborhoods</title>
        <p>In our analysis of foreign neighbors, we restricted comparisons to English and each other language (Dutch, French, German, and Spanish) to facilitate ease of comparisons, and because English is one of the most commonly learned second languages <xref ref-type="bibr" rid="pone.0043230-Mejer1">[34]</xref>. Foreign orthographic neighbors were found to make relatively substantial contributions to overall neighborhood size, constituting between 13–20% of a word’s total neighbors on average. Within-language neighbors still dominated overall neighborhood size, likely because languages have different orthotactic rules and requirements for the formulation of valid words. The result is that words in each of the languages we examined were more similar in orthographic form to other words within the same language than they were to foreign words.</p>
        <p>Compared to foreign orthographic neighbors, foreign phonological neighbors were very rare. The effect of foreign phonological neighbors on overall neighborhood size was quite low, and the percentage of a word’s neighbors that derived from a foreign language was even lower, between 1–8%. These results are consistent with those of Vitevitch <xref ref-type="bibr" rid="pone.0043230-Vitevitch3">[32]</xref>, who conducted an analysis of foreign phonological neighbors across Spanish and English and found that the two languages share relatively few neighbors.</p>
        <p>One potential reason for the small number of foreign neighbors is that though the five languages we investigated share an alphabetic system (aside from accented letters), they contain phonological systems that are much more distinct. Because the orthographic structure of a language is anchored by that language’s writing system, orthography does not vary much over time. Conversely, a language’s phonetic structure has much more freedom to vary over time and across geographical space; the accumulation of these phonological changes likely contributes to the languages’ phonological distinctiveness, thereby reducing the number of foreign phonological neighbors.</p>
        <p>While comparisons of foreign neighbors can be used for purposes of stimuli construction and to validate cross-linguistic comparisons, it is important to note that our data should not be interpreted as a measure of the bilingual mental lexicon. In order to make true claims about the nature of bilingual lexical representations based on corpus analyses, it would first be necessary to procure a bilingual corpus in which frequency values are representative of usage when a single individual speaks two languages. To our knowledge, such a corpus does not exist. If bilingual corpora can be obtained, it would be worthwhile to conduct neighborhood analyses using those lexical entries.</p>
      </sec>
      <sec id="s4d">
        <title>Conclusions and Future Directions</title>
        <p>The corpus analysis presented in the current study provides a novel tool for researchers who study language processing. It enables comparisons between orthographic and phonological neighbors and within and across five languages.</p>
        <p>While neighborhood information for some languages has been made available in the past <xref ref-type="bibr" rid="pone.0043230-Vitevitch1">[13]</xref>, <xref ref-type="bibr" rid="pone.0043230-Frauenfelder1">[15]</xref>, <xref ref-type="bibr" rid="pone.0043230-Balota1">[20]</xref>, <xref ref-type="bibr" rid="pone.0043230-Duyck1">[21]</xref>, the database that we present here provides comparable corpora and analyses across languages. We also expand upon the past examinations of foreign neighbors in Spanish and English <xref ref-type="bibr" rid="pone.0043230-Vitevitch3">[32]</xref> by supplying foreign neighborhood data for four language pairs – English-Dutch, English-French, English-German, English-Spanish – and by including both orthographic and phonological neighbors. Our future efforts will focus on developing a comparable corpus derived from written word data using written-word databases, such as Google Ngram (<ext-link ext-link-type="uri" xlink:href="http://books.google.com/ngram" xlink:type="simple">http://books.google.com/ngram</ext-link>) to complement our present work on spoken language.</p>
        <p>In sum, the current paper presents a unified database for indexing neighborhood information derived from spoken corpora. These data provide cross-linguistic metrics that are crucial for designing experiments of spoken and written language processing. We have made our database available in searchable form (see <xref ref-type="fig" rid="pone-0043230-g015">Figure 15</xref> for a screenshot of the web interface) at <ext-link ext-link-type="uri" xlink:href="http://clearpond.northwestern.edu" xlink:type="simple">http://clearpond.northwestern.edu</ext-link>; it is also freely available for download.</p>
      </sec>
    </sec>
    <sec id="s5">
      <title>Supporting Information</title>
      <supplementary-material id="pone.0043230.s001" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.s001" xlink:type="simple">
        <label>Table S1</label>
        <caption>
          <p>
            <bold>IPA consonants and example words in each language.</bold>
          </p>
          <p>(DOCX)</p>
        </caption>
      </supplementary-material>
      <supplementary-material id="pone.0043230.s002" mimetype="application/vnd.openxmlformats-officedocument.wordprocessingml.document" orientation="portrait" position="float" xlink:href="info:doi/10.1371/journal.pone.0043230.s002" xlink:type="simple">
        <label>Table S2</label>
        <caption>
          <p>
            <bold>IPA vowels and example words in each language.</bold>
          </p>
          <p>(DOCX)</p>
        </caption>
      </supplementary-material>
    </sec>
  </body>
  <back>
    <ack>
      <p>The authors thank members of the <italic>Northwestern Bilingualism and Psycholinguistics Laboratory</italic> for helpful comments on earlier drafts of this manuscript. We also thank the creators of the SUBTLEX databases, who provided the corpora used in the present study.</p>
      <p>Correspondence should be addressed to Anthony Shook, 2240 Campus Drive, Evanston, IL 60208 USA. Email: a-shook @northwestern.edu.</p>
    </ack>
    <ref-list>
      <title>References</title>
      <ref id="pone.0043230-Coltheart1">
        <label>1</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Coltheart</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Davelaar</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Jonasson</surname><given-names>JT</given-names></name>, <name name-style="western"><surname>Besner</surname><given-names>D</given-names></name> (<year>1977</year>) <article-title>Access to the internal lexicon</article-title>. <source>Attention and Performance</source> <volume>VI</volume>: <fpage>535</fpage>–<lpage>555</lpage>.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Luce1">
        <label>2</label>
        <mixed-citation publication-type="other" xlink:type="simple">Luce PA, Pisoni D, Goldinger S (1990) Similarity neighborhoods of spoken words. Cognitve Models of Speech Processing: Psycholinguistic and Computational Perspectives. 122–147.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Davis1">
        <label>3</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Davis</surname><given-names>CJ</given-names></name>, <name name-style="western"><surname>Perea</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Acha</surname><given-names>J</given-names></name> (<year>2009</year>) <article-title>Re(de)fining the orthographic neighborhood: The role of addition and deletion neighbors in lexical decision and reading</article-title>. <source>Journal of Experimental Psychology: Human Perception and Performance</source> <volume>35</volume>: <fpage>1550</fpage>–<lpage>1570</lpage> doi:10.1037/a0014253.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Siakaluk1">
        <label>4</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Siakaluk</surname><given-names>PD</given-names></name>, <name name-style="western"><surname>Sears</surname><given-names>CR</given-names></name>, <name name-style="western"><surname>Lupker</surname><given-names>SJ</given-names></name> (<year>2002</year>) <article-title>Orthographic neighborhood effects in lexical decision: The effects of nonword orthographic neighborhood size</article-title>. <source>Journal of Experimental Psychology: Human Perception and Performance</source> <volume>28</volume>: <fpage>661</fpage>–<lpage>681</lpage> doi:10.1037//0096-1523.28.3.661.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Andrews1">
        <label>5</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Andrews</surname><given-names>S</given-names></name> (<year>1989</year>) <article-title>Frequency and neighborhood effects on lexical access: Activation or search?</article-title> <source>Journal of Experimental Psychology: Learning, Memory, and Cognition</source> <volume>15</volume>: <fpage>802</fpage>–<lpage>814</lpage> doi:10.1037//0278-7393.15.5.802.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Tsai1">
        <label>6</label>
        <mixed-citation publication-type="other" xlink:type="simple">Tsai J, Lee C, Lin Y, Tzeng O, Hung D (2006) Neighborhood size effects of Chinese words in lexical decision and reading. Word Journal of the International Linguistic Association: 659–675.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Andrews2">
        <label>7</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Andrews</surname><given-names>S</given-names></name> (<year>1997</year>) <article-title>The effect of orthographic similarity on lexical retrieval: Resolving neighborhood conflicts</article-title>. <source>Psychonomic Bulletin &amp; Review</source> <volume>4</volume>: <fpage>439</fpage>–<lpage>461</lpage>.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Davis2">
        <label>8</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Davis</surname><given-names>CJ</given-names></name>, <name name-style="western"><surname>Perea</surname><given-names>M</given-names></name> (<year>2005</year>) <article-title>BuscaPalabras: A program for deriving orthographic and phonological neighborhood statistics and other psycholinguistic indices in Spanish</article-title>. <source>Behavior Research Methods</source> <volume>37</volume>: <fpage>665</fpage>–<lpage>671</lpage> doi:10.3758/BF03192738.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Yates1">
        <label>9</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Yates</surname><given-names>M</given-names></name> (<year>2005</year>) <article-title>Phonological neighbors speed visual word processing: Evidence from multiple tasks</article-title>. <source>Journal of Experimental Psychology: Learning, Memory, and Cognition</source> <volume>31</volume>: <fpage>1385</fpage>–<lpage>1397</lpage> doi:10.1037/0278-7393.31.6.1385.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Grainger1">
        <label>10</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Grainger</surname><given-names>J</given-names></name>, <name name-style="western"><surname>Muneaux</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Farioli</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Ziegler</surname><given-names>JC</given-names></name> (<year>2005</year>) <article-title>Effects of phonological and orthographic neighbourhood density interact in visual word recognition</article-title>. <source>The Quarterly Journal of Experimental Psychology</source> <volume>58</volume>: <fpage>981</fpage>–<lpage>998</lpage> doi:10.1080/02724980443000386.</mixed-citation>
      </ref>
      <ref id="pone.0043230-BaeseBerk1">
        <label>11</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Baese-Berk</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Goldrick</surname><given-names>M</given-names></name> (<year>2009</year>) <article-title>Mechanisms of interaction in speech production</article-title>. <source>Language and Cognitive Processes</source> <volume>24</volume>: <fpage>527</fpage>–<lpage>554</lpage> doi:10.1080/01690960802299378.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Marian1">
        <label>12</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Marian</surname><given-names>V</given-names></name>, <name name-style="western"><surname>Blumenfeld</surname><given-names>HK</given-names></name>, <name name-style="western"><surname>Boukrina</surname><given-names>OV</given-names></name> (<year>2008</year>) <article-title>Sensitivity to phonological similarity within and across languages</article-title>. <source>Journal of Psycholinguistic Research</source> <volume>37</volume>: <fpage>141</fpage>–<lpage>170</lpage> doi:10.1007/s10936-007-9064-9.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Vitevitch1">
        <label>13</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vitevitch</surname><given-names>MS</given-names></name>, <name name-style="western"><surname>Rodríguez</surname><given-names>E</given-names></name> (<year>2005</year>) <article-title>Neighborhood density effects in spoken word recognition in Spanish</article-title>. <source>Journal of Multilingual Communication Disorders</source> <volume>3</volume>: <fpage>64</fpage>–<lpage>73</lpage> doi:10.1080/14769670400027332.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Baus1">
        <label>14</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Baus</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Costa</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Carreiras</surname><given-names>M</given-names></name> (<year>2008</year>) <article-title>Neighbourhood density and frequency effects in speech production: A case for interactivity</article-title>. <source>Language and Cognitive Processes</source> <volume>23</volume>: <fpage>866</fpage>–<lpage>888</lpage> doi:10.1080/01690960801962372.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Frauenfelder1">
        <label>15</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Frauenfelder</surname><given-names>UH</given-names></name>, <name name-style="western"><surname>Baayen</surname><given-names>RH</given-names></name>, <name name-style="western"><surname>Hellwig</surname><given-names>FM</given-names></name>, <name name-style="western"><surname>Schreuder</surname><given-names>R</given-names></name> (<year>1993</year>) <article-title>Neighborhood density and frequency across languages and modalities</article-title>. <source>Journal of Memory and Language</source> <volume>32</volume>: <fpage>781</fpage>–<lpage>804</lpage> doi:10.1006/jmla.1993.1039.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Dell1">
        <label>16</label>
        <mixed-citation publication-type="other" xlink:type="simple">Dell G (2003) Neighbors in the lexicon: Friends or foes? In: Schiller N, Meyer AS, editors. Phonetics and Phonology in Language. New York: Mouton De Gruyter. 9–47.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Gahl1">
        <label>17</label>
        <mixed-citation publication-type="other" xlink:type="simple">Gahl S, Yao Y, Johnson K (2012) Why reduce? Phonological neighborhood density and phonetic reduction in spontaneous speech. Journal of Memory and Language. doi:10.1016/j.jml.2011.11.006.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Luce2">
        <label>18</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Luce</surname><given-names>PA</given-names></name>, <name name-style="western"><surname>Pisoni</surname><given-names>DB</given-names></name> (<year>1998</year>) <article-title>Recognizing spoken words: The neighborhood activation model</article-title>. <source>Ear and Hearing</source> <volume>19</volume>: <fpage>1</fpage>–<lpage>36</lpage> doi:10.1097/00003446-199802000-00001.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Vitevitch2">
        <label>19</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Vitevitch</surname><given-names>MS</given-names></name>, <name name-style="western"><surname>Stamer</surname><given-names>MK</given-names></name> (<year>2006</year>) <article-title>The curious case of competition in Spanish speech production</article-title>. <source>Language and Cognitive Processes</source> <volume>21</volume>: <fpage>760</fpage>–<lpage>770</lpage> doi:10.1080/01690960500287196.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Balota1">
        <label>20</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Balota</surname><given-names>DA</given-names></name>, <name name-style="western"><surname>Yap</surname><given-names>MJ</given-names></name>, <name name-style="western"><surname>Cortese</surname><given-names>MJ</given-names></name>, <name name-style="western"><surname>Hutchison</surname><given-names>KA</given-names></name>, <name name-style="western"><surname>Kessler</surname><given-names>B</given-names></name>, <etal>et al</etal>. (<year>2007</year>) <article-title>The English lexicon project</article-title>. <source>Behavior Research Methods</source> <volume>39</volume>: <fpage>445</fpage>–<lpage>459</lpage> doi:10.3758/BF03193014.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Duyck1">
        <label>21</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Duyck</surname><given-names>W</given-names></name>, <name name-style="western"><surname>Desmet</surname><given-names>T</given-names></name>, <name name-style="western"><surname>Verbeke</surname><given-names>LPC</given-names></name>, <name name-style="western"><surname>Brysbaert</surname><given-names>M</given-names></name> (<year>2004</year>) <article-title>WordGen: A tool for word selection and nonword generation in Dutch, English, German, and French</article-title>. <source>Behavior Research Methods, Instruments, &amp; Computers</source> <volume>36</volume>: <fpage>488</fpage>–<lpage>499</lpage> doi:10.3758/BF03195595.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Davis3">
        <label>22</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Davis</surname><given-names>CJ</given-names></name> (<year>2005</year>) <article-title>N-watch: A program for deriving neighborhood size and other psycholinguistic statistics</article-title>. <source>Behavior Research Methods</source> <volume>37</volume>: <fpage>65</fpage>–<lpage>70</lpage> doi:10.3758/BF03206399.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Perea1">
        <label>23</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Perea</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Urkia</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Davis</surname><given-names>CJ</given-names></name>, <name name-style="western"><surname>Agirre</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Laseka</surname><given-names>E</given-names></name>, <etal>et al</etal>. (<year>2006</year>) <article-title>E-Hitz: Word frequency list and a program for deriving psycholinguistic statistics in an agglutinative language (Basque)</article-title>. <source>Behavior Research Methods</source> <volume>38</volume>: <fpage>610</fpage>–<lpage>615</lpage>.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Keuleers1">
        <label>24</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Keuleers</surname><given-names>E</given-names></name>, <name name-style="western"><surname>Brysbaert</surname><given-names>M</given-names></name>, <name name-style="western"><surname>New</surname><given-names>B</given-names></name> (<year>2010</year>) <article-title>SUBTLEX-NL: A new measure for Dutch word frequency based on film subtitles</article-title>. <source>Behavior Research Methods</source> <volume>42</volume>: <fpage>643</fpage>–<lpage>650</lpage> doi:10.3758/BRM.42.3.643.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Brysbaert1">
        <label>25</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Brysbaert</surname><given-names>M</given-names></name>, <name name-style="western"><surname>New</surname><given-names>B</given-names></name> (<year>2009</year>) <article-title>Moving beyond Kucera and Francis: A critical evaluation of current word frequency norms and the introduction of a new and improved word frequency measure for American English</article-title>. <source>Behavior Research Methods</source> <volume>41</volume>: <fpage>977</fpage>–<lpage>990</lpage> doi:10.3758/BRM.41.4.977.</mixed-citation>
      </ref>
      <ref id="pone.0043230-New1">
        <label>26</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>New</surname><given-names>B</given-names></name>, <name name-style="western"><surname>Pallier</surname><given-names>C</given-names></name>, <name name-style="western"><surname>Brysbaert</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Ferrand</surname><given-names>L</given-names></name> (<year>2004</year>) <article-title>Lexique 2: A new French lexical database</article-title>. <source>Behavior Research Methods, Instruments, &amp; Computers</source> <volume>36</volume>: <fpage>516</fpage>–<lpage>524</lpage>.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Brysbaert2">
        <label>27</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Brysbaert</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Buchmeier</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Conrad</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Jacobs</surname><given-names>AM</given-names></name>, <name name-style="western"><surname>Bölte</surname><given-names>J</given-names></name>, <etal>et al</etal>. (<year>2011</year>) <article-title>The word frequency effect: A review of recent developments and implications for the choice of frequency estimates in German</article-title>. <source>Experimental Psychology</source> <volume>58</volume>: <fpage>412</fpage>–<lpage>424</lpage> doi:10.1027/1618-3169/a000123.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Cuetos1">
        <label>28</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cuetos</surname><given-names>F</given-names></name>, <name name-style="western"><surname>Glez-Nosti</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Barbón</surname><given-names>A</given-names></name>, <name name-style="western"><surname>Brysbaert</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Barbon</surname><given-names>A</given-names></name> (<year>2011</year>) <article-title>SUBTLEX-ESP: Spanish word frequencies based on film subtitles</article-title>. <source>Psicologica</source> <volume>32</volume>: <fpage>133</fpage>–<lpage>143</lpage>.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Goulden1">
        <label>29</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Goulden</surname><given-names>R</given-names></name>, <name name-style="western"><surname>Nation</surname><given-names>P</given-names></name>, <name name-style="western"><surname>Read</surname><given-names>J</given-names></name> (<year>1990</year>) <article-title>How large can a receptive vocabulary be?</article-title> <source>Applied Linguistics</source> <volume>11</volume>: <fpage>341</fpage>–<lpage>363</lpage> doi:10.1093/applin/11.4.341.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Baayen1">
        <label>30</label>
        <mixed-citation publication-type="other" xlink:type="simple">Baayen RH, Piepenbrock R, Gulikers L (1995) The CELEX Lexical Database (Release 2) [CD-ROM].</mixed-citation>
      </ref>
      <ref id="pone.0043230-Weide1">
        <label>31</label>
        <mixed-citation publication-type="other" xlink:type="simple">Weide RL (1998) CMU pronunciation dictionary, release 0.6.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Vitevitch3">
        <label>32</label>
        <mixed-citation publication-type="other" xlink:type="simple">Vitevitch MS (2012) What do foreign neighbors say about the mental lexicon? Bilingualism: Language and Cognition: 1–6. doi:10.1017/S1366728911000149.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Seymour1">
        <label>33</label>
        <mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Seymour</surname><given-names>PHK</given-names></name>, <name name-style="western"><surname>Aro</surname><given-names>M</given-names></name>, <name name-style="western"><surname>Erskine</surname><given-names>JM</given-names></name> (<year>2003</year>) <article-title>Foundation literacy acquisition in European orthographies</article-title>. <source>British Journal of Psychology</source> <volume>94</volume>: <fpage>143</fpage>–<lpage>174</lpage> doi:10.1348/000712603321661859.</mixed-citation>
      </ref>
      <ref id="pone.0043230-Mejer1">
        <label>34</label>
        <mixed-citation publication-type="other" xlink:type="simple">Mejer L, Boateng S, Turchetti P (2010) More students study foreign languages in Europe but perceptions of skill levels differ significantly. Statistics in Focus 49.</mixed-citation>
      </ref>
    </ref-list>
  </back>
</article>