<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0165147</article-id>
<article-id pub-id-type="publisher-id">PONE-D-16-16031</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Research assessment</subject><subj-group><subject>Peer review</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognition</subject><subj-group><subject>Decision making</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Science policy</subject><subj-group><subject>Research funding</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Network analysis</subject><subj-group><subject>Social networks</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Sociology</subject><subj-group><subject>Social networks</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Research assessment</subject><subj-group><subject>Research quality assessment</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>People and places</subject><subj-group><subject>Demography</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Medicine and health sciences</subject><subj-group><subject>Health care</subject><subj-group><subject>Health services research</subject></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Research assessment</subject><subj-group><subject>Research validity</subject></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>The Influence of Peer Reviewer Expertise on the Evaluation of Research Funding Applications</article-title>
<alt-title alt-title-type="running-head">The Influence of Peer Reviewer Expertise on the Evaluation of Research Funding Applications</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0001-6043-2153</contrib-id>
<name name-style="western">
<surname>Gallo</surname>
<given-names>Stephen A.</given-names>
</name>
<xref ref-type="corresp" rid="cor001">*</xref>
<xref ref-type="aff" rid="aff001"/>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Sullivan</surname>
<given-names>Joanne H.</given-names>
</name>
<xref ref-type="aff" rid="aff001"/>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Glisson</surname>
<given-names>Scott R.</given-names>
</name>
<xref ref-type="aff" rid="aff001"/>
</contrib>
</contrib-group>
<aff id="aff001"><addr-line>Scientific Peer Advisory and Review Services Division, American Institute of Biological Sciences, Reston, Virginia, United States of America</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Yechiam</surname>
<given-names>Eldad</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Technion Israel Institute of Technology, ISRAEL</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<fn fn-type="con">
<p><list list-type="simple"><list-item><p><bold>Conceptualization:</bold> SAG.</p></list-item> <list-item><p><bold>Data curation:</bold> SAG JHS.</p></list-item> <list-item><p><bold>Formal analysis:</bold> SAG.</p></list-item> <list-item><p><bold>Investigation:</bold> SAG JHS.</p></list-item> <list-item><p><bold>Methodology:</bold> SAG.</p></list-item> <list-item><p><bold>Project administration:</bold> SAG.</p></list-item> <list-item><p><bold>Resources:</bold> SRG.</p></list-item> <list-item><p><bold>Supervision:</bold> SAG.</p></list-item> <list-item><p><bold>Visualization:</bold> SAG.</p></list-item> <list-item><p><bold>Writing – original draft:</bold> SAG.</p></list-item> <list-item><p><bold>Writing – review &amp; editing:</bold> SAG SRG JHS.</p></list-item></list></p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">sgallo@aibs.org</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>21</day>
<month>10</month>
<year>2016</year>
</pub-date>
<pub-date pub-type="collection">
<year>2016</year>
</pub-date>
<volume>11</volume>
<issue>10</issue>
<elocation-id>e0165147</elocation-id>
<history>
<date date-type="received">
<day>20</day>
<month>4</month>
<year>2016</year>
</date>
<date date-type="accepted">
<day>9</day>
<month>10</month>
<year>2016</year>
</date>
</history>
<permissions>
<copyright-year>2016</copyright-year>
<copyright-holder>Gallo et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0165147"/>
<abstract>
<p>Although the scientific peer review process is crucial to distributing research investments, little has been reported about the decision-making processes used by reviewers. One key attribute likely to be important for decision-making is reviewer expertise. Recent data from an experimental blinded review utilizing a direct measure of expertise has found that closer intellectual distances between applicant and reviewer lead to harsher evaluations, possibly suggesting that information is differentially sampled across subject-matter expertise levels and across information type (e.g. strengths or weaknesses). However, social and professional networks have been suggested to play a role in reviewer scoring. In an effort to test whether this result can be replicated in a real-world unblinded study utilizing self-assessed reviewer expertise, we conducted a retrospective multi-level regression analysis of 1,450 individual unblinded evaluations of 725 biomedical research funding applications by 1,044 reviewers. Despite the large variability in the scoring data, the results are largely confirmatory of work from blinded reviews, by which a linear relationship between reviewer expertise and their evaluations was observed—reviewers with higher levels of self-assessed expertise tended to be harsher in their evaluations. However, we also found that reviewer and applicant seniority could influence this relationship, suggesting social networks could have subtle influences on reviewer scoring. Overall, these results highlight the need to explore how reviewers utilize their expertise to gather and weight information from the application in making their evaluations.</p>
</abstract>
<funding-group>
<funding-statement>The author(s) received no specific funding for this work.</funding-statement>
</funding-group>
<counts>
<fig-count count="4"/>
<table-count count="5"/>
<page-count count="18"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>All relevant data are within the paper and its Supporting Information files.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Despite the nearly ubiquitous use of scientific peer review to help guide the highly competitive allocation of billions of dollars in research funding and long-standing concerns about the reliability and validity of review results, peer review remains under-studied, particularly in regard to the types of decision-making processes reviewers use in the evaluation of applications [<xref ref-type="bibr" rid="pone.0165147.ref001">1</xref>–<xref ref-type="bibr" rid="pone.0165147.ref008">8</xref>]. Historically, the relative agreement between reviewers assessing the same application has been reported to be quite low [<xref ref-type="bibr" rid="pone.0165147.ref008">8</xref>–<xref ref-type="bibr" rid="pone.0165147.ref010">10</xref>]. Cole et al. (1981) has suggested that reviewer disagreement can, in part, be attributed to the process of translating opinion to score as well as epistemological differences in the quality of science (among individuals and among fields), while many studies have suggested a variety of individual biases may potentially be at play [<xref ref-type="bibr" rid="pone.0165147.ref008">8</xref>,<xref ref-type="bibr" rid="pone.0165147.ref011">11</xref>]. Whatever the source, it is clear that there is a great deal of subjectivity in the evaluation of research applications and understanding the sources of (and relative contributions to) reviewer disagreement will be crucial to improve the peer review process.</p>
<p>Individual differences in decision-making, as with bias, can be influenced by investigator and reviewer characteristics, as well as the content of the scientific work and limitations in assessing the true quality of research [<xref ref-type="bibr" rid="pone.0165147.ref011">11</xref>]. One important, but often neglected, reviewer characteristic is subject matter expertise. While there is a substantial literature on the use of expertise in decision-making, relatively little has been published on how a reviewer’s scientific expertise contributes to their decision-making processes in peer review [<xref ref-type="bibr" rid="pone.0165147.ref012">12</xref>–<xref ref-type="bibr" rid="pone.0165147.ref014">14</xref>]. Currently, most funding agencies utilize evaluators with close subject matter expertise relative to the applications that are being reviewed, as it is believed that recruitment of appropriate expertise is vital to the legitimacy of peer review [<xref ref-type="bibr" rid="pone.0165147.ref015">15</xref>]. Indeed, it has been noted that deference to expertise is an important aspect of peer review [<xref ref-type="bibr" rid="pone.0165147.ref016">16</xref>].</p>
<p>Two recent studies have explored the relationship between the proximity of applicant and reviewer and scoring behavior, and have found contrasting results. The first study, a blinded, randomized experimental peer review of biomedical research funding applications (Boudreau et al., 2016), examined individual judgments relative to the intellectual distance between the research applications and reviewers (through comparisons of associated medical subject heading [MeSH] keywords) and found that reviewers with a shorter intellectual distance from the application tended to provide harsher evaluations, even on work that is highly innovative [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>]. Based on the observed linear nature and the direction of the relationship, the authors discount popular theories where reviewers are either motivated to promote close research based on similar schools of thought or cronyism (opposite directionality) or motivated by strategic incentives to penalize “close” research competing for resources (non-linearity) [<xref ref-type="bibr" rid="pone.0165147.ref018">18</xref>–<xref ref-type="bibr" rid="pone.0165147.ref019">19</xref>]. Also, classical theories of decision making under uncertainty are discounted as they predict opposite directionality [<xref ref-type="bibr" rid="pone.0165147.ref020">20</xref>]. Therefore, the authors explain these occurrences through a proposed bounded rationality decision-making model [<xref ref-type="bibr" rid="pone.0165147.ref021">21</xref>]. In this model, reviewers’ rational decisions are limited by constraints of computational resources and in the availability of information; in this case, information is limited to what can be gathered by the reviewer from the research application and his/her focused knowledge in their area(s) of expertise [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>]. The authors suggest that reviewers with higher expertise sample more information from the application, detecting more weaknesses than non-experts, which may lead to harsher evaluations. While other studies have implicated a tendency of reviewers to focus and/or agree more on weaknesses than strengths, the amplification of this tendency based on expertise has important implications on the promotion of innovative research and on reviewer recruitment [<xref ref-type="bibr" rid="pone.0165147.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0165147.ref010">10</xref>].</p>
<p>These results are contrasted by the results from a retrospective study of data from the unblinded NIH grant review process by Li (2015), which suggest that a higher degree of relatedness between the applicant and review committee members (by way of reviewers citing the applicant’s work in their publications) yields a greater probability of being funded [<xref ref-type="bibr" rid="pone.0165147.ref022">22</xref>]. This result is directly opposed to the above findings, however, there are some important differences. The Boudreau study was an experimental study blinded to applicant identity and investigated individual reviewer scoring while the Li analysis was an analysis of historical data from unblinded reviews and examined overall scoring by study section panel (which included panel dynamics and discussion effects). An important additional difference is in the measure of proximity between reviewer and applicant; the first study using a direct measure of topic area similarity (intellectual distance) between reviewer and applicant and the second study utilizing a measure of citation behavior of reviewers relative to the applicant (relatedness). Li contends that citation relatedness allows reviewers to sample important information about the quality of the application through knowledge of the applicant’s body of work. In this sense, citation relatedness is not necessarily the same as intellectual distance, and may be influenced by social network strength and status.</p>
<p>While both status and social networking have been examined for their potential role in research funding and peer review [<xref ref-type="bibr" rid="pone.0165147.ref015">15</xref>,<xref ref-type="bibr" rid="pone.0165147.ref023">23</xref>–<xref ref-type="bibr" rid="pone.0165147.ref028">28</xref>], in the work of Boudreau the review was blinded to applicant identity, precluding any social and professional networking effects. Therefore, it is unclear how subject matter expertise affects reviewer evaluations in cases where social networking effects may apply. Bounded rationality may still be a consistent explanation for the role of expertise in this case, but if social networking effects dominate, the relationship between expertise and scoring may be diminished. Thus, examining the relationship between a direct measure of reviewer expertise and the scoring of research applications under unblinded, real-world conditions is an important area not yet explored in the literature, and may have implications on the subjective differences between reviewers.</p>
<p>In an effort to address this gap, we have conducted a retrospective analysis of historical data, utilizing unblinded individual reviewer evaluations of biomedical research applications, as well as their self-reported expertise ratings, for an anonymized funding program. We were then able to measure the effects of subject matter expertise on scoring in a scenario that may be governed by both social networking/status effects as well as bounded rationality. Although the assignments are not randomized and all the reviewers had some level of relevant expertise, there is sufficient variation in expertise scoring to examine this relationship in a real-world setting. In addition, this analysis includes data from both funded and unfunded applications, examining a broad range of application quality, albeit through a subjective measure. In this work, we did not directly measure the social network links between reviewer and applicant. However, we did examine reviewer and applicant demographic factors, as they may affect the relationship between expertise and scoring. Based on the importance of reviewer expertise to the peer review process, we hypothesize that self-assessed reviewer expertise ratings will closely approximate keyword based measures of intellectual distance and despite exposure to social effects, research close in subject matter will elicit more negative evaluations from reviewers, although there may be potential influences from demographic factors.</p>
</sec>
<sec id="sec002" sec-type="intro">
<title>Background</title>
<sec id="sec003">
<title>American Institute of Biological Sciences</title>
<p>The American Institute of Biological Sciences (AIBS) is a national scientific organization that promotes the use of science to inform decision-making that advances biology for the benefit of science and society. For over 50 years, AIBS has provided independent peer review services for funding organizations and research institutes and has worked to identify and promote best practices in peer review through the analysis of peer reviews we have conducted. The data contained in this analysis was generated through an independent peer review AIBS conducted for an unnamed research-funding program.</p>
</sec>
<sec id="sec004">
<title>Research Funding Program</title>
<p>Research applications were submitted throughout the year to a general program announcement for an anonymized biomedical research-funding program and were reviewed individually as they were received. No formal budget limitations were included in this announcement, although the appropriateness of the proposed budget was a review criterion. Project timelines were limited to 5 years. Topic areas varied considerably across the field of biomedicine, including but not limited to infectious diseases, traumatic injury, physiological and psychological health, rehabilitative medicine, medical simulation, health informatics, medical robotics, and nanomedicine. AIBS coordinated independent, objective peer review of these applications for the research funder. The submitted applications typically had project narrative page lengths of 15–20 pages (the inclusion of biosketches, appendices, etc, often brought total page lengths to 50–75 pages) and had a median budget of $1.4 million. Some were multi-institutional applications. No formal payline (e.g. scoring threshold) was established by the funding agency and decisions were made not only on the basis of scientific merit but also other programmatic factors (e.g. portfolio balance, etc.). AIBS did not take part in funding decisions, nor did we have access to progress or productivity reports from funded applications.</p>
</sec>
<sec id="sec005">
<title>AIBS Peer Review Process</title>
<p>Similar to a journal-style review process, two reviewers were recruited by AIBS to provide an independent assessment of each application. AIBS staff assessed the research areas covered in the application and invited potential reviewers with appropriate and relevant scientific expertise. Based on AIBS experience, reviewers accepted invitations based largely on how well their expertise matched the application, whether there was a conflict of interest and their availability to participate. Potential reviewers received the application title, abstract and name of the principal investigator to aid in their decision. For this program, reviewers were typically only recruited for one application at a time; if they agreed to review they received a very small honoraria for their participation. If a reviewer accepted the invitation to review, they submitted to AIBS an up-to-date version of their CV and signed a confidentiality agreement. In addition, they declared any conflict of interest and signed a conflict of interest form. AIBS vetted the reviewers for any additional potential conflicts with respect to the application submitted. Once vetted, reviewers were sent the application and a form and guide (review template) for evaluating the application (two reviewers in total for each application). Reviewers typically returned their evaluations within a couple of weeks. The opportunity to discuss the application between reviewers was not given in these reviews. This analysis includes 1,450 reviews of 725 applications (619 applicants) by 1,044 reviewers conducted from 2009 to 2012.</p>
<p>Over this time period, the review process was consistent, using essentially the same review criteria: appropriateness of the research goals and hypotheses, feasibility and appropriateness of the methods and experimental design, the qualifications of the personnel, human subject and animal welfare concerns, the suitability of the facilities, the appropriateness of the budget, and the potential impact of the proposed research. Reviewers utilized these criteria to give the application an overall scientific merit (SM) score on a scale of 1.0 to 5.0 (where 1 is the highest merit and 5 is the lowest merit) as well as rated their own reviewer expertise (RE) relative to the application they reviewed on a scale of 1.0 to 5.0 (where 1 indicates the highest level of expertise and 5 indicates the lowest level of expertise). Reviewers also provided written evaluations following a form and guide template based on the review criteria. Evaluative comments were provided under each review criterion. Guidelines for scoring scientific merit and reviewer expertise are listed in <xref ref-type="table" rid="pone.0165147.t001">Table 1</xref>.</p>
<table-wrap id="pone.0165147.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.t001</object-id>
<label>Table 1</label> <caption><title>Definitions for Scientific Merit and Reviewer Expertise Scoring.</title></caption>
<alternatives>
<graphic id="pone.0165147.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<tbody>
<tr>
<td align="left" colspan="2"><bold>Definitions for Scientific Merit Scoring</bold></td>
</tr>
<tr>
<td align="left"><bold>Scientific Merit Score</bold></td>
<td align="left"><bold>Description</bold></td>
</tr>
<tr>
<td align="left">1.0–1.9</td>
<td align="left">EXCEPTIONAL: The scientific merit of the proposal probably places it in the top 10% of proposals in its area of research; it warrants the highest priority for support. This category should be used only for truly outstanding proposals. A score of 1 indicates a very high level of scientific merit.</td>
</tr>
<tr>
<td align="left">2.0–2.9</td>
<td align="left">GOOD: The scientific merit of the proposal is such that it warrants high priority for support. A score of 2 indicates a significant level of scientific merit.</td>
</tr>
<tr>
<td align="left">3.0–3.9</td>
<td align="left">FAIR: The scientific merit of the proposal is not impressive, and it is probable that it does not warrant support as submitted. If the topic of the proposal is of particular interest, partial support may be warranted. Full support is unlikely to be appropriate. A score of 3 indicates only a moderate level of scientific merit.</td>
</tr>
<tr>
<td align="left">4.0–4.9</td>
<td align="left">DEFICIENT: The scientific merit of the proposal is low. The proposal is flawed, and support is unlikely to be justifiable. A score of 4 indicates a low level of scientific merit.</td>
</tr>
<tr>
<td align="left">5.0</td>
<td align="left">REJECT: The proposal has very serious deficiencies; it should not be supported under any circumstances. A score of 5 indicates a rejection of the work by the reviewers.</td>
</tr>
<tr>
<td align="left" colspan="2"><bold>Definitions for Reviewer Expertise Scoring</bold></td>
</tr>
<tr>
<td align="left"><bold>Reviewer Expertise Score</bold></td>
<td align="left"><bold>Description</bold></td>
</tr>
<tr>
<td align="left">1.0–1.9</td>
<td align="left">The proposal is in your specific area of active research. Your knowledge of current publications is thorough.</td>
</tr>
<tr>
<td align="left">2.0–2.9</td>
<td align="left">The proposal is in your general area of active research. Your knowledge of the literature is reasonably current. You could apply the techniques of the proposal with little difficulty. You have some ongoing communication with workers in the area of the proposal.</td>
</tr>
<tr>
<td align="left">3.0–3.9</td>
<td align="left">The proposal is outside your general area of active research, but it is related. You have knowledge derived from interest in the major discipline embracing the specific proposal, but have little or no contact with other workers active in similar research.</td>
</tr>
<tr>
<td align="left">4.0–4.9</td>
<td align="left">The proposal is not related to your active interest and is no more than peripheral to your major discipline.</td>
</tr>
<tr>
<td align="left">5.0</td>
<td align="left">The proposal is not related to your major discipline, and your knowledge is only derived through supplemental reading and interest in general science.</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
</sec>
<sec id="sec006">
<title>Approach</title>
<sec id="sec007">
<title>Data Gathering and Reduction</title>
<p>To explore the relationship between SM and RE and how this was affected by both reviewer and investigator demographics, such as gender, seniority level, etc., these characteristics needed to be gleaned by hand by one person combing through applicant and reviewer CVs submitted at the time of review. Attributes were then cross-checked by a second person to reduce any potential errors. The reviewer and PI position levels were assessed largely through their job title, (e.g. assistant professor title was labeled as a junior position, while both associate and full professor titles were labeled as non-junior positions). Titles that did not fall into these categories were subject to further review by both persons. If a clear consensus could not be achieved the data were excluded. In a similar process, the reviewer and PI’s sector (academic or non-academic) was largely assessed through the institution title. Again, if institutional titles did not clearly fall into categories, they were subject to further review by both AIBS staff, and if a clear consensus could not be achieved, the data were excluded. Overall, 18 applications and their associated critiques were removed from the data set due to the lack of complete data.</p>
</sec>
<sec id="sec008">
<title>Non-Random Selection of Reviewers and Application Quality</title>
<p>AIBS recruited reviewers for this program based on relevant expertise, so that the most qualified reviewers evaluated each application. As mentioned above, this analysis represents a retrospective examination of data taken as part of a contract with a funding agency to conduct independent peer review of research applications. Therefore random assignment of reviewer expertise to applications, and therefore control groups with non-expert reviewers, were not possible. As 90% of reviewers have reviewer expertise levels of 2.0 or better in this analysis, it is clear reviewers self-select for participation based on having relevant expertise (which they base on the abstract text, the PI’s name and the fact that they have been selected by AIBS). However, we feel the likelihood of reviewers self-selecting based on application quality is highly unlikely, as the only information they have prior to agreeing to review is the abstract, which likely does not contain enough information for reviewers to assess quality. Also, we find it unlikely that AIBS staff could select for application quality in the assignment process, as assignments are guided largely through expertise matching and conflict of interest vetting. In addition, reviewer recruitment based on expertise represents the standard in peer review practice, and randomization of assignments may in fact introduce decision-making processes not typically present in the expert evaluation of research applications. Nevertheless, it should be mentioned that a potential limitation of this work is the lack of an ex-poste measure of proposal quality, which we are missing as we do not have access to the final productivity reports of funded applications. Further, it is very difficult to measure the ex-poste quality for unfunded applications.</p>
</sec>
<sec id="sec009">
<title>Variables</title>
<p>Scientific merit was the main dependent variable, with a global mean of 2.77 and a standard deviation of 0.98. A plot of the average SM (plus or minus the standard error) against the relative rank of each application is displayed in <xref ref-type="fig" rid="pone.0165147.g001">Fig 1</xref>, underscoring the great variability in evaluations. The main relationship we investigated was between scientific merit and the self-assessed RE, which had a global mean of 1.66 and a standard deviation of 0.54. Other independent variables were categorical in nature, included both reviewer and investigator characteristics and were coded as 0 or 1. These included the position level (1 = presence of junior level; RevJ or PIJ), gender (1 = presence of female; RevF or PIF), academic sector (1 = presence of non-academic sector; RevNonAc or PINonAc), and degree (1 = presence of MD degree, RevMD or PIMD). The overall demographics and relative proportions for reviewers and applicants are listed in <xref ref-type="table" rid="pone.0165147.t002">Table 2</xref>.</p>
<fig id="pone.0165147.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Average SM Score per Application.</title>
<p>Average SM score for each application versus the rank order by average SM score, with error bars representing standard error (2009–2012).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.g001" xlink:type="simple"/>
</fig>
<table-wrap id="pone.0165147.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.t002</object-id>
<label>Table 2</label> <caption><title>Reviewer and applicant demographics (2009–2012).</title></caption>
<alternatives>
<graphic id="pone.0165147.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center"/>
<th align="center" colspan="2">Reviewer Demographics (Total Reviewers = 1044)</th>
<th align="center" colspan="2">Applicant Demographics (Total Proposals = 725)</th>
</tr>
<tr>
<th align="center">Factors</th>
<th align="center">N</th>
<th align="center">%</th>
<th align="center">N</th>
<th align="center">%</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center">Male</td>
<td align="center">799</td>
<td align="center">77</td>
<td align="center">619</td>
<td align="center">85</td>
</tr>
<tr>
<td align="center">Female</td>
<td align="center">245</td>
<td align="center">23</td>
<td align="center">106</td>
<td align="center">15</td>
</tr>
<tr>
<td align="center">Junior Position</td>
<td align="center">276</td>
<td align="center">26</td>
<td align="center">121</td>
<td align="center">17</td>
</tr>
<tr>
<td align="center">Non-Junior Position</td>
<td align="center">768</td>
<td align="center">74</td>
<td align="center">604</td>
<td align="center">83</td>
</tr>
<tr>
<td align="center">Academia</td>
<td align="center">965</td>
<td align="center">92</td>
<td align="center">470</td>
<td align="center">65</td>
</tr>
<tr>
<td align="center">Non-Academia</td>
<td align="center">79</td>
<td align="center">8</td>
<td align="center">255</td>
<td align="center">35</td>
</tr>
<tr>
<td align="center">No MD Degree</td>
<td align="center">716</td>
<td align="center">69</td>
<td align="center">474</td>
<td align="center">65</td>
</tr>
<tr>
<td align="center">MD Degree</td>
<td align="center">328</td>
<td align="center">31</td>
<td align="center">251</td>
<td align="center">35</td>
</tr>
</tbody>
</table>
</alternatives>
</table-wrap>
</sec>
<sec id="sec010">
<title>Multilevel Model and Reliability</title>
<p>Due to the hierarchical structure of the data, a multi-level multiple regression approach was applied, whereby individual reviewer evaluations (level 1) were nested in application groupings (level 2). A random intercept model was used whereby Y<sub>ij</sub> represents the scientific merit of reviewer <italic>i</italic> of the <italic>j</italic>th application, X<sub>hij</sub> represents a vector of <italic>h</italic> independent variables with <italic>h</italic> coefficients (β<sub>h</sub>), β<sub>0</sub> represents the constant intercept component; μ<sub>0j</sub> represents the random intercept component which varies across applications and the residual error component is represented by ε<sub>ij</sub>. Thus the model is written as:
<disp-formula id="pone.0165147.e001">
<alternatives>
<graphic id="pone.0165147.e001g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0165147.e001" xlink:type="simple"/>
<mml:math display="block" id="M1">
<mml:mrow><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>X</mml:mi><mml:mrow><mml:mi>h</mml:mi><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:msub><mml:mi>β</mml:mi><mml:mi>h</mml:mi></mml:msub><mml:mo>+</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>μ</mml:mi><mml:mrow><mml:mn>0</mml:mn><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>ϵ</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow>
</mml:math>
</alternatives>
<label>(1)</label>
</disp-formula>
with ε<sub>ij</sub> ~ N(0, σ<sub>ε</sub><sup>2</sup>) and u<sub>0j</sub> ~ N(0, σ<sub>u</sub><sup>2</sup>). The variances of the model are var(μ<sub>0j</sub>) = (σ<sub>μ</sub>)<sup>2</sup> for between application variance and var(ε<sub>ij</sub>) = (σ<sub>ε</sub>)<sup>2</sup> for residual variance. Using these, we can calculate ρ, the intraproposal correlation coefficient [<xref ref-type="bibr" rid="pone.0165147.ref029">29</xref>], using the following formula:
<disp-formula id="pone.0165147.e002">
<alternatives>
<graphic id="pone.0165147.e002g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0165147.e002" xlink:type="simple"/>
<mml:math display="block" id="M2">
<mml:mrow><mml:mi>ρ</mml:mi><mml:mo>=</mml:mo><mml:mfrac><mml:mrow><mml:msubsup><mml:mi>σ</mml:mi><mml:mi>μ</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow><mml:mrow><mml:msubsup><mml:mi>σ</mml:mi><mml:mi>μ</mml:mi><mml:mn>2</mml:mn></mml:msubsup><mml:mo> </mml:mo><mml:mo>+</mml:mo><mml:mo> </mml:mo><mml:msubsup><mml:mi>σ</mml:mi><mml:mi>ϵ</mml:mi><mml:mn>2</mml:mn></mml:msubsup></mml:mrow></mml:mfrac></mml:mrow>
</mml:math>
</alternatives>
<label>(2)</label>
</disp-formula></p>
<p>This represents the correlation between two ratings of the same application. The inter-rater reliability (IRR), which is the reliability of the average rating of an application, can then be calculated using ρ and the Spearman-Brown formula [<xref ref-type="bibr" rid="pone.0165147.ref030">30</xref>].</p>
<sec id="sec011">
<title>Analytic approach</title>
<p>For comparison, we started with a random intercept-only model as a baseline (model 1):
<disp-formula id="pone.0165147.e003">
<alternatives>
<graphic id="pone.0165147.e003g" mimetype="image" position="anchor" xlink:href="info:doi/10.1371/journal.pone.0165147.e003" xlink:type="simple"/>
<mml:math display="block" id="M3">
<mml:mrow><mml:msub><mml:mi>Y</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>=</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>β</mml:mi><mml:mn>0</mml:mn></mml:msub><mml:mo>+</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>μ</mml:mi><mml:mrow><mml:mn>0</mml:mn><mml:mi>j</mml:mi></mml:mrow></mml:msub><mml:mo>+</mml:mo><mml:mo> </mml:mo><mml:msub><mml:mi>ε</mml:mi><mml:mrow><mml:mi>i</mml:mi><mml:mi>j</mml:mi></mml:mrow></mml:msub></mml:mrow>
</mml:math>
</alternatives>
<label>(3)</label>
</disp-formula></p>
<p>We successively added the variables to the model (RE [model 2], reviewer demographic variables as a set [model 3], and applicant demographic variables as a set [model 4]) and used the deviance (as measured by -2 log likelihood) as a measure of significant improvement in fit. In this way, internal comparisons can be made without formal controls for expertise. It should be noted that the deviance reported for model 1 was measured against a fixed intercept model with no random component. Similarly, calculation of the R<sup>2</sup> was based on log likelihood comparisons to the fixed intercept, non-random model [<xref ref-type="bibr" rid="pone.0165147.ref031">31</xref>]. For models 3 and 4, both main effects and interactions with RE were included for every variable. All multilevel models in this paper were based on the maximum likelihood estimation of the linear mixed effect function in R [<xref ref-type="bibr" rid="pone.0165147.ref032">32</xref>]. Reviewer expertise scores were centered by creating Z-scores based on the global RE mean and standard deviation. To visualize the effects of individual independent variables on the SM/RE relationship, scatterplots with simple regression were created. Also, to examine SM variance over the RE range, data were binned into five groupings of RE scores (1.0–1.4 [N = 500], 1.5–1.9[N = 233], 2.0–2.4[N = 604], 2.5–2.9[N = 71], 3.0–3.5[N = 39]); SM variances were calculated for each and values were then plotted against RE bin values.</p>
</sec>
</sec>
<sec id="sec012">
<title>Inter-reviewer Agreement</title>
<p>A key component to the bounded rationality hypothesis is the tendency of reviewers to detect weaknesses over strengths. As previous research has suggested there is more agreement on unfundable applications than fundable ones [<xref ref-type="bibr" rid="pone.0165147.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0165147.ref010">10</xref>], we explored the intersection of expertise, proposal quality and inter-reviewer agreement. We examined the difference in score (absolute values) between the two reviewers averaged across applications for reviewer pairs with both high and low average expertise (high is defined as the average RE of the two reviewers being less than or equal to the global median of 1.65; low is defined as average RE that is more than the median). Utilizing an arbitrary funding threshold of an average SM score of better than 2.0 (top 15% of applications) to generate a distinction of application quality, we could then examine these differences for “fundable” (high quality) versus “unfundable” (low quality) applications. In addition, we also examined the proportion of applications where both reviewers agreed on the fundability status for both high and low quality applications and high and low levels of reviewer expertise.</p>
</sec>
</sec>
<sec id="sec013" sec-type="results">
<title>Results</title>
<sec id="sec014">
<title>Partition of Variance and Reliability</title>
<p>The results of the baseline model fit (model 1) suggest a substantial amount of variation in SM scores across applications, (σ<sub>μ</sub>)<sup>2</sup> = 0.219±0.042, as well as a substantial residual component (σ<sub>ε</sub>)<sup>2</sup> = 0.740±0.023 (<xref ref-type="table" rid="pone.0165147.t003">Table 3</xref>). Thus, 22.9% of total variance in the SM score is due to the applications while the majority of variance, 77.1%, is due to a combination of the reviewers, the interactions of reviewers and applications and random noise. This relative proportion of variance is similar to that others have seen and is consistent with the variability we see in <xref ref-type="fig" rid="pone.0165147.g001">Fig 1</xref> [<xref ref-type="bibr" rid="pone.0165147.ref029">29</xref>]. In addition, the intraproposal correlation coefficient (ρ) and the IRR can be calculated from the baseline variances, yielding 0.23 and 0.37, respectively. Both values indicate poor reliability.</p>
<table-wrap id="pone.0165147.t003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.t003</object-id>
<label>Table 3</label> <caption><title>Multi-level regression comparison of random-intercept models.</title></caption>
<alternatives>
<graphic id="pone.0165147.t003g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.t003" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="left"/>
<th align="left">Model 1</th>
<th align="left">Model 2</th>
<th align="left">Model 3</th>
<th align="left">Model 4</th>
<th align="left">Model 5</th>
</tr>
<tr>
<th align="left"/>
<th align="left">Baseline Across Applications</th>
<th align="left">RE</th>
<th align="left">RE + Reviewer Demographics</th>
<th align="left">RE + Reviewer and Applicant Demographics</th>
<th align="left">RE + Seniority + Sector</th>
</tr>
</thead>
<tbody>
<tr>
<td align="left"><bold>Random Effects</bold></td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">Variance Across Proposals</td>
<td align="left">0.219 (0.042)*</td>
<td align="left">0.213 (0.042)*</td>
<td align="left">0.204 (0.042)*</td>
<td align="left">0.194 (0.042)*</td>
<td align="left">0.192 (0.043)*</td>
</tr>
<tr>
<td align="left">Residual Variance</td>
<td align="left">0.740 (0.023)*</td>
<td align="left">0.722 (0.023)*</td>
<td align="left">0.717 (0.023)*</td>
<td align="left">0.714 (0.023)*</td>
<td align="left">0.716 (0.023)*</td>
</tr>
<tr>
<td align="left"><bold>Fixed Effects</bold></td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">Intercept</td>
<td align="left">2.77 (0.03)*</td>
<td align="left">2.77 (0.03)*</td>
<td align="left">2.80 (0.04)*</td>
<td align="left">2.74 (0.05)*</td>
<td align="left">2.73 (0.04)*</td>
</tr>
<tr>
<td align="left">Reviewer Expertise</td>
<td align="left"/>
<td align="left">-0.15 (0.02)*</td>
<td align="left">-0.09 (0.04)*</td>
<td align="left">-0.09 (0.05)</td>
<td align="left">-0.09 (0.03)*</td>
</tr>
<tr>
<td align="left"><bold>Deviance (previous model)</bold></td>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
<td align="left"/>
</tr>
<tr>
<td align="left">Change in 2LL</td>
<td align="left">38.9*</td>
<td align="left">36.8*</td>
<td align="left">20.9*</td>
<td align="left">16.0*</td>
<td align="left">36.9* (compared to Model 2)</td>
</tr>
<tr>
<td align="left"><bold>R</bold><sup><bold>2</bold></sup></td>
<td align="left">0.026</td>
<td align="left">0.051</td>
<td align="left">0.070</td>
<td align="left">0.075</td>
<td align="left">0.075</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t003fn001"><p>Analysis based on z-score of RE. Asterisk indicates statistical significance (p&lt;0.05). Standard error is reported in parentheses. Each model was compared to the previous model (unless noted otherwise) through the calculation of deviance, as measured by the change in -2 log likelihood. All main effects and interactions with RE were included. Model 1 was compared to a fixed intercept model.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec015">
<title>Reviewer Expertise</title>
<p>The addition of the RE variable in model 2 represented a highly significant improvement over the baseline fit (<xref ref-type="table" rid="pone.0165147.t003">Table 3</xref>; χ<sup>2</sup> (1) = 36.8, p&lt;0.001). There is a significant decrease in variance across proposals (2.7%) and residual variance (2.4%) when reviewer expertise is controlled for, suggesting it is an important source of variation. We also see that the estimate for the centered RE coefficient (z-score of RE) is statistically significant and is negative (-0.15±0.02; p&lt;0.001), whereby lower levels of expertise (higher RE values) result in improving SM scores (lower SM values). Thus, across the entire range of differences in RE in this data set, there is a resulting difference in SM score of 0.84, underscoring the importance of this factor. As visualized in the fitted scatterplot in <xref ref-type="fig" rid="pone.0165147.g002">Fig 2</xref>, there is a clear linear, negative relationship between SM and RE, with no obvious step-functions or other non-linearities. The residuals from the regression have no correlation with RE (R<sup>2</sup>&lt;0.001) and are centered around zero (data not shown). Overall, these data are consistent with the data of Boudreau et al. (2016) [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>].</p>
<fig id="pone.0165147.g002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.g002</object-id>
<label>Fig 2</label>
<caption>
<title>Scatterplot of SM versus RE.</title>
<p>Scatterplot and linear regression fit of SM versus RE data with gray area representing 95% confidence intervals (2009–2012).</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.g002" xlink:type="simple"/>
</fig>
<p>However there is a great amount of variability. If the SM data are binned using the procedure described above, a simple regression yields a correlation between SM variance and RE scores (slope = -0.28±0.08, intercept = 1.37±0.17, R<sup>2</sup> = 0.80, p = 0.04; <xref ref-type="supplementary-material" rid="pone.0165147.s001">S1 Fig</xref>), while no such correlation was found by Boudreau et al., who suggests greater scoring variance for close research may be a sign of strategic motivations in reviewer scoring [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>]. However, this trend is consistent with our reliability analysis that (despite higher agreement on fundability) there are larger differences in score for poorly rated proposals as compared to well-rated ones (see below). Thus, as higher expertise tends to yield poorer scores, there is also more variability.</p>
</sec>
<sec id="sec016">
<title>Reviewer and Applicant Demographic Factors</title>
<p>Reviewer demographic factors (including RevJ, RevF, RevNonAc and RevMD) and their interactions with RE were added in model 3, which again represented a significant improvement in fit over model 2 (<xref ref-type="table" rid="pone.0165147.t003">Table 3</xref>; χ<sup>2</sup> (8) = 20.9, p = 0.007). The variance across applications and the residual variance decreased, explaining 4.1% and 0.7%, respectively. Research sector (RevNonAc) was found to yield a direct effect (-0.30±0.09, p = 0.001) as did the interaction between reviewer seniority (RevJ) and RE (-0.15±0.06, p = 0.011). When the data are separated by seniority group, plotted and then fit via simple linear regression (<xref ref-type="fig" rid="pone.0165147.g003">Fig 3</xref>), we can begin to visualize the effect of reviewer seniority on the relationship between RE and SM scoring, with senior reviewer scoring less sensitive to expertise. These results suggest that reviewer attributes can be important in modifying the relationship between RE and SM. This is in contrast to the results of Boudreau et al. (2016), who found no influence of reviewer seniority on the relationship between intellectual distance and scoring [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>].</p>
<fig id="pone.0165147.g003" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.g003</object-id>
<label>Fig 3</label>
<caption>
<title>Reviewer Seniority Scatterplots of SM versus RE.</title>
<p>Scatterplot and linear fit of raw SM versus RE scoring data of all evaluations by junior reviewers (in red) and by senior reviewers (in blue). The shaded area represents 95% confidence interval.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.g003" xlink:type="simple"/>
</fig>
<p>When applicant demographic factors (including PIJ, PIF, PINonAc, and PIMD) and their interactions with RE were added in model 4 (<xref ref-type="table" rid="pone.0165147.t003">Table 3</xref>), a significant improvement over model 3 was observed (χ<sup>2</sup> (8) = 16.0, p = 0.043). The variance across applications and the residual variance decreased in this case as well, explaining 4.6% and 0.4%, respectively. Both the main effect of PIJ (0.17±0.08, p = 0.02) and its interaction with RE (-0.15±0.07, p = 0.03) were found to be significant. The interaction of RevJ with RE was also still found to be significant in this model (-0.14±0.06, p = 0.02), as was the main effect of RevNonAc (-0.29±0.09, p = 0.001). Triple interactions between RevJ, PIJ and RE as well as interactions directly between PIJ and RevJ were not significant if added to model 4 (χ<sup>2</sup> (2) = 2.50, p = 0.29). Nevertheless, these data also suggest that seniority of applicants as well as reviewers can influence the relationship between SM scoring and RE. This is visualized in <xref ref-type="fig" rid="pone.0165147.g004">Fig 4</xref>, where scoring for senior applicants is less dependent on reviewer expertise. Also, as reviewer research sector (RevNonAc) is still significant in this model, we added an interaction with applicant research sector (PINonAc) to model 4, which yielded an improvement in the model (χ<sup>2</sup> (1) = 5.19, p = 0.023) and a significant interaction (0.40±0.18, p = 0.024).</p>
<fig id="pone.0165147.g004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.g004</object-id>
<label>Fig 4</label>
<caption>
<title>Applicant Seniority Scatterplots of SM versus RE.</title>
<p>Scatterplot and linear fit of raw SM versus RE scoring data of all evaluations of junior applicants (red) and non-junior applicants (blue). The shaded area represents 95% confidence interval.</p>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.g004" xlink:type="simple"/>
</fig>
<p>In our final model (model 5), we added the significant demographic factors (PIJ and RevJ and their interactions with RE as well as RevNonAc and PINonAc and their interaction) to the basic model 2 (<xref ref-type="table" rid="pone.0165147.t004">Table 4</xref>). There is a clear improvement in fit over model 2 (χ<sup>2</sup> (7) = 36.9, p&lt;0.001). Significant main effects were seen for RE (-0.09±0.03; p = 0.01), PIJ (0.16±0.07; p = 0.03), RevNonAc (-0.48±0.12; p = 0.0001) and significant interaction effects were seen for RE:RevJ and RE:PIJ (-0.15±0.06; p = 0.01 and -0.16±0.07; p = 0.02, respectively) as well as for RevNonAc:PINonAc (0.41±0.18, p = 0.02). These data suggest that reviewer and applicant seniority as well as research sector can affect SM scoring.</p>
<table-wrap id="pone.0165147.t004" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.t004</object-id>
<label>Table 4</label> <caption><title>Summary of Model 5 (RE + Seniority + Research Sector).</title></caption>
<alternatives>
<graphic id="pone.0165147.t004g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.t004" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<tbody>
<tr>
<td align="left"><bold>Random Effects</bold></td>
<td align="left"/>
</tr>
<tr>
<td align="left">Variance Across Proposals</td>
<td align="left">0.192 (0.043)*</td>
</tr>
<tr>
<td align="left">Residual Variance</td>
<td align="left">0.716 (0.023)*</td>
</tr>
<tr>
<td align="left">Intra-proposal Correlation</td>
<td align="left">0.21*</td>
</tr>
<tr>
<td align="left">Inter-Rater Reliability</td>
<td align="left">0.35</td>
</tr>
<tr>
<td align="left">R<sup>2</sup></td>
<td align="left">0.075</td>
</tr>
<tr>
<td align="left"><bold>Fixed Effects</bold></td>
<td align="left"/>
</tr>
<tr>
<td align="center"><italic>Main</italic></td>
<td align="left"/>
</tr>
<tr>
<td align="left">Intercept</td>
<td align="left">2.73 (0.04)*</td>
</tr>
<tr>
<td align="left">Reviewer Expertise (RE)</td>
<td align="left">-0.09 (0.03)*</td>
</tr>
<tr>
<td align="left">Junior Reviewer (RevJ)</td>
<td align="left">0.05 (0.06)</td>
</tr>
<tr>
<td align="left">Junior Applicant (PIJ)</td>
<td align="left">0.16 (0.07)*</td>
</tr>
<tr>
<td align="left">Non Academic Reviewer (RevNonAc)</td>
<td align="left">-0.48 (0.12)*</td>
</tr>
<tr>
<td align="left">Non-Academic Applicant (PINonAc)</td>
<td align="left">0.07 (0.06)</td>
</tr>
<tr>
<td align="center"><italic>Interactions</italic></td>
<td align="left"/>
</tr>
<tr>
<td align="left">Reviewer Expertise * Junior Reviewer</td>
<td align="left">-0.15 (0.06)*</td>
</tr>
<tr>
<td align="left">Reviewer Expertise * Junior Applicant</td>
<td align="left">-0.16 (0.07)*</td>
</tr>
<tr>
<td align="left">Non Academic Reviewer (RevNonAc) * Non-Academic Applicant (PINonAc)</td>
<td align="left">0.41 (0.18)*</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t004fn001"><p>Model 5 (random intercept model including fixed effects from RE, RevJ, PIJ, PINonAc and RevNonAc) coefficient estimates are listed. Analysis is based on z-score of RE. Standard error is reported in parentheses and asterisks indicate statistical significance (p&lt;0.05). Results are broken out by random and fixed components (including both main and RE interaction effects). In addition, estimates of intra-proposal correlation and inter-rater reliability are provided.</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
<sec id="sec017">
<title>Inter-Reviewer Agreement</title>
<p>We also examined the level of agreement between reviewers on the fundability of individual applications based on a hypothetical scoring threshold of SM &lt; 2.0. As seen in <xref ref-type="table" rid="pone.0165147.t005">Table 5</xref>, reviewer pairs of both higher and lower average expertise agree much more on the fundability status of poorer applications (81–82% agreement) as compared to the top scoring applications (33–35% agreement). This is generally consistent with the previous literature that reviewers focus and agree more readily on weaknesses, although somewhat surprising that expertise has no effect [<xref ref-type="bibr" rid="pone.0165147.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0165147.ref010">10</xref>]. To explore this further, we examined the average scoring differences (absolute differences) of high and low expertise reviewer pairs for fundable and unfundable applications. Here we see the exact opposite effect whereby bigger scoring differences (less agreement) are seen amongst poorer applications. However, the scoring range covered by unfundable applications (2.0–5.0) is larger than that for fundable (1.0–1.9), and while reviewers are not aware of a specific scoring threshold for funding, there is less disagreement about fundability between a Fair (3.0) and Deficient (4.0) rating, then there is between an Excellent (1.0) and a Good rating (2.0). It also should be noted that there is slightly worse agreement (via average score differences) for reviewer pairs with higher expertise as compared to low expertise, suggesting that scoring translation is exacerbated in expert reviewers. Thus, while both high and low expertise reviewer pairs agree more on what is unfundable, there is still great variability in the scoring, even among reviewers with high expertise.</p>
<table-wrap id="pone.0165147.t005" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0165147.t005</object-id>
<label>Table 5</label> <caption><title>Reviewer scoring and fundability agreement.</title></caption>
<alternatives>
<graphic id="pone.0165147.t005g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.t005" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center"/>
<th align="center">Agreement on Fundability (Higher Expertise)</th>
<th align="center">Agreement on Fundability (Lower Expertise)</th>
<th align="center">Average Score Difference (Higher Expertise)</th>
<th align="center">Average Score Difference (Lower Expertise)</th>
</tr>
</thead>
<tbody>
<tr>
<td align="center"><bold>Fundable Application (Top 15%)</bold></td>
<td align="center">33%</td>
<td align="center">35%</td>
<td align="center">0.66 (0.05)</td>
<td align="center">0.57 (0.05)</td>
</tr>
<tr>
<td align="center"><bold>Unfundable Application (Bottom 85%)</bold></td>
<td align="center">82%</td>
<td align="center">81%</td>
<td align="center">1.09 (0.05)</td>
<td align="center">0.95 (0.04)</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t005fn001"><p>Inter-reviewer agreement between two reviewers assigned the same application on fundability, based on a 2.0 funding threshold (less than 2.0 is arbitrarily deemed fundable). This is shown for fundable and unfundable applications and for higher and lower average reviewer expertise (high is higher than median RE of 1.65; low is lower than median). Also average scoring difference (absolute differences) between assigned reviewers is shown with a similar breakdown (standard error shown in parentheses).</p></fn>
</table-wrap-foot>
</table-wrap>
</sec>
</sec>
<sec id="sec018" sec-type="conclusions">
<title>Discussion</title>
<p>Our results indicate a low IRR and a low proportion of total variance in SM scores due to the applications themselves. This result and other similar findings in the literature underscore the importance of identifying significant predictor variables to help explain this variance and the underlying differences in individual reviewer decision-making [<xref ref-type="bibr" rid="pone.0165147.ref003">3</xref>,<xref ref-type="bibr" rid="pone.0165147.ref008">8</xref>,<xref ref-type="bibr" rid="pone.0165147.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0165147.ref029">29</xref>,<xref ref-type="bibr" rid="pone.0165147.ref033">33</xref>].</p>
<p>In this multi-level analysis, where controlling for self-assessed RE explained 2.7% of variance across proposals and 2.4% of residual variance, we observed RE to be a significant predictor of SM score (<xref ref-type="table" rid="pone.0165147.t003">Table 3</xref>), with a negative and linear correlation (<xref ref-type="fig" rid="pone.0165147.g002">Fig 2</xref>). It should be noted that this relationship was observed despite our non-randomized sample, which likely had a smaller range of expertise levels than represented in the Boudreau study, as non-experts were not included. Overall, these results are consistent with the results of Boudreau et al (2016), despite the linear relationship we observed between SM variance and RE, which we feel is explained by the inter-subjectivity of translating similar evaluations into scores across the scoring scale (<xref ref-type="supplementary-material" rid="pone.0165147.s001">S1 Fig</xref>) [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>,<xref ref-type="bibr" rid="pone.0165147.ref034">34</xref>]. Both the regression and the inter-reviewer agreement analysis are consistent with the bounded rationality perspective and the notion that reviewers disproportionally focus on weaknesses, which are detected with greater frequency with increasing levels of subject matter knowledge, thereby creating differences in scoring leniency between reviewers with high or low expertise. As with Boudreau, we can likely discount alternate explanations of competition driving the penalization of close research due to the observed linear relationship between SM and RE, as well as the low agreement by reviewer pairs with high RE on what should be actually be funded.</p>
<p>Despite the unblinded nature of this review, intellectually close applications are still reviewed less favorably than distant ones, which is the opposite result to Li et al (2015). Thus, it is likely that the processes of bounded rationality dominate over social influences, which is an important finding given the many claims of bias and cronyism in peer review. However, based on the multilevel modeling results we report here, it seems reviewer and applicant characteristics explain significant proportions of variance (<xref ref-type="table" rid="pone.0165147.t003">Table 3</xref>) and can potentially alter the relationship between SM and RE (<xref ref-type="table" rid="pone.0165147.t004">Table 4</xref>), which suggests social effects may play out in more subtle ways.</p>
<p>For instance, the sensitivity of SM to RE was diminished as a function of reviewer seniority (<xref ref-type="fig" rid="pone.0165147.g003">Fig 3</xref>). It may be that senior reviewers are sampling distinct types of information or there are differences in the weighting of information, systematically (across expertise levels) placing more weight on one review criterion over another. This prioritization of criteria has been described as a commensuration bias [<xref ref-type="bibr" rid="pone.0165147.ref035">35</xref>]. In our unblinded study, it may be that senior reviewers prioritize an investigator’s track record. Li (2015) has observed in NIH review committees that higher applicant-reviewer relatedness (via citation connections) does improve an applicant’s score, the opposite effect of intellectual distance [<xref ref-type="bibr" rid="pone.0165147.ref022">22</xref>]. As senior scientists have more established publication and social networks, it is more likely a senior reviewer is related (by citation) to a given applicant. Although reliance on relatedness may introduce the potential for cronyism in the review process, Li shows evidence that reviewers use relatedness to gather additional information and make inferences about an application’s quality, and on the whole, utilizing relatedness results in a 30% increase in the correlation between funding decisions and application quality. Thus, expert reviewers may be using prior knowledge about an applicant to temper their evaluation of an application, but the extent depends on reviewer seniority.</p>
<p>While it should be noted that Boudreau et al. (2016) did not find that reviewer seniority affected the relationship between scoring and expertise, that review was blinded to applicant information; therefore, consistent with the above explanation, one would not expect any influence [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>]. However, Jayasinghe et al. (2003) also investigated assessor seniority as a predictor of evaluation ratings of applications submitted to the Australian Research Council [<xref ref-type="bibr" rid="pone.0165147.ref029">29</xref>]. Assessor seniority was not found to be a significant component to their model, although RE was not controlled for in their model, and it is unclear what proportion of the assessor pool was senior versus junior. They did, however, observe applicant seniority as an important predictor of application ratings.</p>
<p>We also observed applicant seniority to be a significant predictor of scoring, finding both systematic effects as well as interactions with RE, which diminished the sensitivity of SM scoring to RE (<xref ref-type="fig" rid="pone.0165147.g004">Fig 4</xref>). In our observation, as well as in Jayasinghe’s study, senior applicants were found to systematically receive more favorable scores as well (<xref ref-type="table" rid="pone.0165147.t004">Table 4</xref>). This is also consistent with the above notion that based on applicant status and placement in social networks, reviewers may be tempering methodological weaknesses with <italic>a priori</italic> knowledge of the applicant. Surprisingly in our model, reviewer and applicant seniority were not found to jointly interact with RE in any significant way, as one would assume senior reviewers and senior applicants would have the highest likelihood for social overlap. It may be more complicated social relationships are at play.</p>
<p>It should also be noted that reviewers from non-academic backgrounds tended to be more generous evaluators, and that this effect was negated if the applicants were also not from academia (<xref ref-type="table" rid="pone.0165147.t004">Table 4</xref>). It may be that certain social network effects are different in non-academic circles due to differences in cultural norms [<xref ref-type="bibr" rid="pone.0165147.ref036">36</xref>]. For instance, competition effects may be more pronounced in a non-academic environment where intellectual property concerns dominate. This and other cultural differences may be important factors in reviewer decision making processes, and need to be explored further, as the sample of non-academic reviewers was relatively small in this study (N = 127).</p>
<p>As mentioned above, the non-randomization of the reviewer sample and the lack of direct measures of application quality are limitations of this analysis, although the likelihood of a selection bias for application quality is deemed low. Also, there are likely several omitted variables, particularly properties of applications like innovation and inter-disciplinarity, that are not included in our analysis. However, Boudreau has reported no interaction between innovation and intellectual distance [<xref ref-type="bibr" rid="pone.0165147.ref017">17</xref>]. Also, interdisciplinary proposals would likely yield lowered expertise assessments, which based on the observed trend would result in more favorable scoring patterns, which again is in contrast to recent reports [<xref ref-type="bibr" rid="pone.0165147.ref037">37</xref>]. Thus, we feel the omitted variables do not adequately explain the trend observed here. Also, previous literature has suggested that reviewer expertise seems to be central to a reviewer’s evaluation, as expressed by Lamont (2009) [<xref ref-type="bibr" rid="pone.0165147.ref016">16</xref>]. The limitations underscore the need for further exploration of the role of reviewer expertise in the decision making process of reviewers.</p>
<p>Future studies must also include more extensive analysis (including individually scored criteria) in prospective trials of blinded and un-blinded reviews with applicant and reviewer demographic factors and expertise as variables. Additionally, the role of individual RE in review panels and how this affected by team dynamics, collective expertise and discussion should be explored. And more direct measure of social connectivity should be employed to explore relationships with self-assessed expertise as well as scoring.</p>
<p>Nevertheless, despite these limitations, we have observed in real world conditions a clear but complex role of reviewer expertise in the research evaluation process. Our results also suggest that, even for reviewer pairs with high expertise, substantial reviewer disagreement exists about the scoring of applications. Given the large amount of variability we observe across reviewers, it may be that there is simply a great diversity of opinion in what good science is and a fundamental limitation in the ability of a reviewer to forecast which projects are the most likely to be successful and impactful. More research exploring the types of weaknesses and strengths reviewers focus on and how they are weighted and prioritized will be crucial in accounting for inter-reviewer disagreement. Many of these results will have great impact on not only understanding the multi-faceted process of decision-making in peer review, but will have practical implications in guiding how reviewers should be recruited, trained, and moderated by administrative staff to produce the most equitable, reliable, and valid evaluations.</p>
</sec>
<sec id="sec019">
<title>Supporting Information</title>
<supplementary-material id="pone.0165147.s001" mimetype="image/tiff" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.s001" xlink:type="simple">
<label>S1 Fig</label>
<caption>
<title>SM Variance versus Binned RE.</title>
<p>SM scoring data was binned according by RE into 5 groups and then the variance in SM score of these groups was plotted against RE. A linear regression fit of the data is displayed.</p>
<p>(TIFF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0165147.s002" mimetype="text/csv" position="float" xlink:href="info:doi/10.1371/journal.pone.0165147.s002" xlink:type="simple">
<label>S1 File</label>
<caption>
<title>Anonymized Source Data File.</title>
<p>Anonymized scoring and demographic data for the review of each application have been compiled in a CSV file.</p>
<p>(CSV)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>We would like to thank Karen Schmaling for her useful comments on the analyses and manuscript drafts, Charles DiMaggio for statistical support, Arati Deshmukh for copy-editing this manuscript and to the SPARS staff for implementing these reviews, which generated these data.</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0165147.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Olbrecht</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Bornmann</surname> <given-names>L</given-names></name>. <article-title>Panel peer review of grant applications: what do we know from research in social psychology on judgment and decision-making in groups?</article-title> <source>Research Evaluation</source> <year>2010</year> <volume>19</volume>(<issue>4</issue>): <fpage>293</fpage>–<lpage>304</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Langfeldt</surname> <given-names>L</given-names></name>. <article-title>The decision-making constraints and processes of grant peer review, and their effects on the review outcome</article-title>. <source>Social Studies of Science</source> <year>2001</year> <volume>31</volume>(<issue>6</issue>): <fpage>820</fpage>–<lpage>841</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref003"><label>3</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Marsh</surname> <given-names>HW</given-names></name>, <name name-style="western"><surname>Jayasinghe</surname> <given-names>UW</given-names></name>, <name name-style="western"><surname>Bond</surname> <given-names>NW</given-names></name>. <article-title>Improving the peer-review process for grant applications: Reliability, validity, bias, and generalizability</article-title>. <source>Amer. Psychologist</source> <year>2008</year> <volume>63</volume>(<issue>3</issue>):<fpage>160</fpage>–<lpage>168</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ginther</surname> <given-names>DK</given-names></name>, <name name-style="western"><surname>Schaffer</surname> <given-names>WT</given-names></name>, <name name-style="western"><surname>Schnell</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Masimore</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Liu</surname> <given-names>F</given-names></name>, <name name-style="western"><surname>Haak</surname> <given-names>LL</given-names></name> <etal>et al</etal>. <article-title>Race, ethnicity, and NIH research awards</article-title>. <source>Science</source> <year>2011</year> <volume>333</volume>(<issue>6045</issue>): <fpage>1015</fpage>–<lpage>1019</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1126/science.1196783" xlink:type="simple">10.1126/science.1196783</ext-link></comment> <object-id pub-id-type="pmid">21852498</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bornmann</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Daniel</surname> <given-names>HD</given-names></name>. <article-title>Selection of research fellowship recipients by committee peer review. Reliability, fairness and predictive validity of Board of Trustees' decisions</article-title>. <source>Scientometrics</source> <year>2005</year> <volume>63</volume>(<issue>2</issue>): <fpage>297</fpage>–<lpage>320</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Danthi</surname> <given-names>N</given-names></name>, <name name-style="western"><surname>Wu</surname> <given-names>CO</given-names></name>, <name name-style="western"><surname>Shi</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Lauer</surname> <given-names>M</given-names></name>. <article-title>Percentile Ranking and Citation Impact of a Large Cohort of National Heart, Lung, and Blood Institute–Funded Cardiovascular R01 Grants</article-title>. <source>Circulation research</source> <year>2014</year> <volume>114</volume>(<issue>4</issue>): <fpage>600</fpage>–<lpage>606</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1161/CIRCRESAHA.114.302656" xlink:type="simple">10.1161/CIRCRESAHA.114.302656</ext-link></comment> <object-id pub-id-type="pmid">24406983</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gallo</surname> <given-names>SA</given-names></name>, <name name-style="western"><surname>Carpenter</surname> <given-names>AS</given-names></name>, <name name-style="western"><surname>Irwin</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>McPartland</surname> <given-names>CD</given-names></name>, <name name-style="western"><surname>Travis</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Reynders</surname> <given-names>S</given-names></name>, <etal>et al</etal>. <article-title>The validation of peer review through research impact measures and the implications for funding strategies</article-title> <source>PLOS ONE</source> <year>2014</year> <volume>9</volume>(<issue>9</issue>): <fpage>e106474</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pone.0106474" xlink:type="simple">10.1371/journal.pone.0106474</ext-link></comment> <object-id pub-id-type="pmid">25184367</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref008"><label>8</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cole</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Cole</surname> <given-names>JR</given-names></name>, <name name-style="western"><surname>Simon</surname> <given-names>GA</given-names></name>. <article-title>Chance and consensus in peer review</article-title>. <source>Science</source> <year>1981</year> <volume>214</volume>(<issue>4523</issue>): <fpage>881</fpage>–<lpage>886</lpage>. <object-id pub-id-type="pmid">7302566</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref009"><label>9</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cicchetti</surname> <given-names>DV</given-names></name>. <article-title>The reliability of peer review for manuscript and grant submissions: A crossdisciplinary investigation</article-title>. <source>Behavioral and Brain Sciences</source> <year>1991</year> <volume>14</volume>(<issue>1</issue>): <fpage>119</fpage>–<lpage>186</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Fogelholm</surname> <given-names>M</given-names></name>, <name name-style="western"><surname>Leppinen</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Auvinen</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Raitanen</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Nuutinen</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Väänänen</surname> <given-names>K</given-names></name>. <article-title>Panel discussion does not improve reliability of peer review for medical research grant proposals</article-title>. <source>Journal of clinical epidemiology</source> <year>2012</year> <volume>65</volume>(<issue>1</issue>): <fpage>47</fpage>–<lpage>52</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1016/j.jclinepi.2011.05.001" xlink:type="simple">10.1016/j.jclinepi.2011.05.001</ext-link></comment> <object-id pub-id-type="pmid">21831594</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lee</surname> <given-names>CJ</given-names></name>, <name name-style="western"><surname>Sugimoto</surname> <given-names>CR</given-names></name>, <name name-style="western"><surname>Zhang</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Cronin</surname> <given-names>B</given-names></name>. <article-title>Bias in peer review</article-title>. <source>Journal of the American Society for Information Science and Technology</source> <year>2013</year> <volume>64</volume>(<issue>1</issue>): <fpage>2</fpage>–<lpage>17</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Salas</surname> <given-names>E</given-names></name>, <name name-style="western"><surname>Rosen</surname> <given-names>MA</given-names></name>, <name name-style="western"><surname>Diaz-Granados</surname> <given-names>D</given-names></name>. <article-title>Expertise-based intuition and decision making in organizations</article-title>. <source>Journal of Management</source> <year>2010</year> <volume>36</volume>: <fpage>941</fpage>–<lpage>973</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref013"><label>13</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Phillips</surname> <given-names>JK</given-names></name>, <name name-style="western"><surname>Klein</surname> <given-names>G</given-names></name>, <name name-style="western"><surname>Sieck</surname> <given-names>WR</given-names></name>. <chapter-title>Expertise in judgment and decision making: A case for training intuitive decision skills</chapter-title>. In: <name name-style="western"><surname>Koehler</surname> <given-names>DJ</given-names></name> and <name name-style="western"><surname>Harvey</surname> <given-names>N</given-names></name>, <source>Blackwell Handbook of Judgment and Decision Making</source>. <publisher-loc>Oxford</publisher-loc>: <publisher-name>Blackwell</publisher-name> <year>2004</year> <fpage>297</fpage>–<lpage>315</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Dijkstra</surname> <given-names>KA</given-names></name>, <name name-style="western"><surname>Pligt</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Kleef</surname> <given-names>GA</given-names></name>. <article-title>Deliberation versus intuition: Decomposing the role of expertise in judgment and decision making</article-title>. <source>Journal of Behavioral Decision Making</source> <year>2013</year> <volume>26</volume>(<issue>3</issue>): <fpage>285</fpage>–<lpage>294</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref015"><label>15</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Merton</surname> <given-names>RK</given-names></name>. <chapter-title>Institutionalized patterns of evaluation in science</chapter-title>. In: <name name-style="western"><surname>Storer</surname> <given-names>NW</given-names></name>, <source>The Sociology of Science: Theoretical and Empirical Investigations</source>. <publisher-loc>Chicago</publisher-loc>: <publisher-name>University of Chicago press</publisher-name> <year>1973</year> <fpage>460</fpage>–<lpage>496</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref016"><label>16</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Lamont</surname> <given-names>M</given-names></name>. <chapter-title>Pragmatic fairness: Customary rules of deliberation</chapter-title>. In: <source>How professors think: Inside the curious world of academic judgment</source>. <publisher-loc>Cambridge, MA</publisher-loc>: <publisher-name>Harvard University Press</publisher-name> <year>2009</year> <fpage>107</fpage>–<lpage>158</lpage></mixed-citation></ref>
<ref id="pone.0165147.ref017"><label>17</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Boudreau</surname> <given-names>KJ</given-names></name>, <name name-style="western"><surname>Guinan</surname> <given-names>EC</given-names></name>, <name name-style="western"><surname>Lakhani</surname> <given-names>KR</given-names></name>, <name name-style="western"><surname>Riedl</surname> <given-names>C</given-names></name>. <chapter-title>Looking Across and Looking Beyond the Knowledge Frontier: Intellectual Distance, Novelty, and Resource Allocation in Science</chapter-title>. <source>Management Science</source> <year>2016</year>: <fpage>1</fpage>–<lpage>19</lpage></mixed-citation></ref>
<ref id="pone.0165147.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Travis</surname> <given-names>GDL</given-names></name>, <name name-style="western"><surname>Collins</surname> <given-names>HM</given-names></name>. <article-title>New light on old boys: Cognitive and institutional particularism in the peer review system</article-title>. <source>Sci.,Tech., Human Values</source> <year>1991</year> <volume>16</volume>(<issue>3</issue>):<fpage>322</fpage>–<lpage>341</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Campanario</surname> <given-names>JM</given-names></name>, <name name-style="western"><surname>Acedo</surname> <given-names>E</given-names></name>. <article-title>Rejecting highly cited papers: The views of scientist who encounter resistance to their discoveries from other scientists</article-title>. <source>J. Amer. Soc. Inform. Sci. Tech</source>. <year>2007</year> <volume>58</volume>(<issue>5</issue>):<fpage>734</fpage>–<lpage>743</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref020"><label>20</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Anand</surname> <given-names>P</given-names></name>. <source>Foundations of rational choice under risk</source>. <publisher-loc>Oxford, UK</publisher-loc>: <publisher-name>Oxford University Press</publisher-name> <year>1995</year></mixed-citation></ref>
<ref id="pone.0165147.ref021"><label>21</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Simon</surname> <given-names>HA</given-names></name>. <chapter-title>Theories of bounded rationality</chapter-title> In: <name name-style="western"><surname>McGuire</surname> <given-names>CB</given-names></name> and <name name-style="western"><surname>Radner</surname> <given-names>R</given-names></name> (Eds) <source>Decision and organization</source> <year>1972</year> <fpage>161</fpage>–<lpage>176</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref022"><label>22</label><mixed-citation publication-type="other" xlink:type="simple">Li D. Expertise vs. bias in evaluation: Evidence from the NIH. HBS Working Paper 16–053, Harvard Business School, Boston. 2015</mixed-citation></ref>
<ref id="pone.0165147.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Azoulay</surname> <given-names>P</given-names></name>, <name name-style="western"><surname>Stuart</surname> <given-names>T</given-names></name>, and <name name-style="western"><surname>Wang</surname> <given-names>Y</given-names></name> <article-title>Matthew: Effect or fable?</article-title> <source>Management Science</source> <year>2013</year> <volume>60</volume>(<issue>1</issue>): <fpage>92</fpage>–<lpage>109</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ebadi</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Schiffauerova</surname> <given-names>A</given-names></name>. <article-title>How to receive more funding for your research? Get connected to the right people!</article-title> <source>PLOS ONE</source> <year>2015</year> <volume>10</volume>(<issue>7</issue>), <fpage>e0133061</fpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1371/journal.pone.0133061" xlink:type="simple">10.1371/journal.pone.0133061</ext-link></comment> <object-id pub-id-type="pmid">26222598</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gillespie</surname> <given-names>GW</given-names></name>, <name name-style="western"><surname>Chubin</surname> <given-names>DE</given-names></name>, <name name-style="western"><surname>Kurzon</surname> <given-names>GM</given-names></name>. <article-title>Experience with NIH peer review: researchers' cynicism and desire for change</article-title>. <source>Science, Technology, &amp; Human Values</source> <year>1985</year> <volume>10</volume>(<issue>3</issue>): <fpage>44</fpage>–<lpage>54</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Wold</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Wennerås</surname> <given-names>C</given-names></name>. <article-title>Nepotism and sexism in peer review</article-title>. <source>Nature</source> <year>1997</year> <volume>387</volume>(<issue>6631</issue>): <fpage>341</fpage>–<lpage>343</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/387341a0" xlink:type="simple">10.1038/387341a0</ext-link></comment> <object-id pub-id-type="pmid">9163412</object-id></mixed-citation></ref>
<ref id="pone.0165147.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Walker</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Barros</surname> <given-names>B</given-names></name>, <name name-style="western"><surname>Conejo</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Neumann</surname> <given-names>K</given-names></name>, <name name-style="western"><surname>Telefont</surname> <given-names>M</given-names></name>. (<year>2015</year>). <article-title>Personal attributes of authors and reviewers, social bias and the outcomes of peer review: a case study</article-title>. <source>F1000Research 2015</source> <volume>4</volume>:<fpage>21</fpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref028"><label>28</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Cole</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Rubin</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Cole</surname> <given-names>JR</given-names></name>. <source>Peer review in the National Science Foundation: Phase one of a study</source>. <publisher-loc>Washington, DC</publisher-loc>: <publisher-name>National Academy Press</publisher-name> <year>1978</year> <fpage>33</fpage>–<lpage>46</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Jayasinghe</surname> <given-names>UW</given-names></name>, <name name-style="western"><surname>Marsh</surname> <given-names>HW</given-names></name>, <name name-style="western"><surname>Bond</surname> <given-names>N</given-names></name>. <article-title>A multilevel cross-classified modelling approach to peer review of grant proposals: The effects of assessor and researcher attributes on assessor ratings</article-title>. <source>Journal of the Royal Statistical Society. Series A (Statistics in Society)</source> <year>2003</year> <volume>166</volume>(<issue>3</issue>), <fpage>279</fpage>–<lpage>300</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Marsh</surname> <given-names>HW</given-names></name>, <name name-style="western"><surname>Ball</surname> <given-names>S</given-names></name>. <article-title>The interjudgmental reliability of reviews for the Journal of Educational Psychology</article-title>. <source>J. Educ. Psychol</source>. <year>1981</year> <volume>73</volume>: <fpage>872</fpage>–<lpage>880</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref031"><label>31</label><mixed-citation publication-type="other" xlink:type="simple">Kramer M. R2 Statistics for Mixed Models. Presented at the 17th Annual Kansas State University Conference on Applied Statistics in Agriculture; 2005; April 24–26. Available: <ext-link ext-link-type="uri" xlink:href="https://www.researchgate.net/profile/Matt_Kramer2/publication/266333403_R_2_STATISTICS_FOR_MIXED_MODELS/links/54d4ea160cf24647580694e9.pdf" xlink:type="simple">https://www.researchgate.net/profile/Matt_Kramer2/publication/266333403_R_2_STATISTICS_FOR_MIXED_MODELS/links/54d4ea160cf24647580694e9.pdf</ext-link></mixed-citation></ref>
<ref id="pone.0165147.ref032"><label>32</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Pinheiro</surname> <given-names>J</given-names></name>, <name name-style="western"><surname>Bates</surname> <given-names>D</given-names></name>, <name name-style="western"><surname>DebRoy</surname> <given-names>S</given-names></name>, <name name-style="western"><surname>Sarkar</surname> <given-names>D</given-names></name> <source>Linear and nonlinear mixed effects models</source>. <year>2007</year> <italic>R package version</italic> 3: <fpage>57</fpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bornmann</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Daniel</surname> <given-names>HD</given-names></name>. <article-title>The effectiveness of the peer review process: Inter-referee agreement and predictive validity of manuscript refereeing at Angewandte Chemie</article-title>. <source>Angewandte Chemie International Edition</source> <year>2008</year> <volume>47</volume>(<issue>38</issue>), <fpage>7173</fpage>–<lpage>7178</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref034"><label>34</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gillespie</surname> <given-names>A</given-names></name>, <name name-style="western"><surname>Cornish</surname> <given-names>F</given-names></name>. <article-title>Intersubjectivity: Towards a dialogical analysis</article-title>. <source>Journal for the theory of social behaviour</source> <year>2010</year> <volume>40</volume>(<issue>1</issue>): <fpage>19</fpage>–<lpage>46</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lee</surname> <given-names>CJ</given-names></name>. <article-title>Commensuration Bias in Peer Review</article-title>. <source>Philosophy of Science</source> <year>2015</year> <volume>82</volume>: <fpage>1272</fpage>–<lpage>1283</lpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Rai</surname> <given-names>AK</given-names></name>. <article-title>Regulating scientific research: Intellectual property rights and the norms of science</article-title>. <source>Nw. UL Rev</source>. <year>1999</year> <volume>94</volume>:<fpage>77</fpage>.</mixed-citation></ref>
<ref id="pone.0165147.ref037"><label>37</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bromham</surname> <given-names>L</given-names></name>, <name name-style="western"><surname>Dinnage</surname> <given-names>R</given-names></name>, <name name-style="western"><surname>Hua</surname> <given-names>X</given-names></name>. <article-title>Interdisciplinary research has consistently lower funding success</article-title>. <source>Nature</source> <year>2016</year> <volume>534</volume>:<fpage>684</fpage>–<lpage>687</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1038/nature18315" xlink:type="simple">10.1038/nature18315</ext-link></comment> <object-id pub-id-type="pmid">27357795</object-id></mixed-citation></ref>
</ref-list>
</back>
</article>