<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.1d3 20150301//EN" "http://jats.nlm.nih.gov/publishing/1.1d3/JATS-journalpublishing1.dtd">
<article article-type="research-article" dtd-version="1.1d3" xml:lang="en" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-id journal-id-type="nlm-ta">PLoS ONE</journal-id>
<journal-id journal-id-type="publisher-id">plos</journal-id>
<journal-id journal-id-type="pmc">plosone</journal-id>
<journal-title-group>
<journal-title>PLOS ONE</journal-title>
</journal-title-group>
<issn pub-type="epub">1932-6203</issn>
<publisher>
<publisher-name>Public Library of Science</publisher-name>
<publisher-loc>San Francisco, CA USA</publisher-loc>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1371/journal.pone.0222694</article-id>
<article-id pub-id-type="publisher-id">PONE-D-19-08635</article-id>
<article-categories>
<subj-group subj-group-type="heading">
<subject>Research Article</subject>
</subj-group>
<subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Economics</subject><subj-group><subject>Labor economics</subject><subj-group><subject>Employment</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Economics</subject><subj-group><subject>Labor economics</subject><subj-group><subject>Employment</subject><subj-group><subject>Jobs</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Research design</subject><subj-group><subject>Survey research</subject><subj-group><subject>Questionnaires</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Cognitive science</subject><subj-group><subject>Cognition</subject><subj-group><subject>Memory</subject><subj-group><subject>Memory recall</subject></subj-group></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Neuroscience</subject><subj-group><subject>Learning and memory</subject><subj-group><subject>Memory</subject><subj-group><subject>Memory recall</subject></subj-group></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Biology and life sciences</subject><subj-group><subject>Psychology</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Social sciences</subject><subj-group><subject>Psychology</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Computer and information sciences</subject><subj-group><subject>Computer applications</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Research assessment</subject></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Research and analysis methods</subject><subj-group><subject>Mathematical and statistical techniques</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Metaanalysis</subject></subj-group></subj-group></subj-group></subj-group><subj-group subj-group-type="Discipline-v3"><subject>Physical sciences</subject><subj-group><subject>Mathematics</subject><subj-group><subject>Statistics</subject><subj-group><subject>Statistical methods</subject><subj-group><subject>Metaanalysis</subject></subj-group></subj-group></subj-group></subj-group></subj-group></article-categories>
<title-group>
<article-title>Does effectiveness in performance appraisal improve with rater training?</article-title>
<alt-title alt-title-type="running-head">Rater training on performance appraisal</alt-title>
</title-group>
<contrib-group>
<contrib contrib-type="author" corresp="yes" xlink:type="simple">
<contrib-id authenticated="true" contrib-id-type="orcid">http://orcid.org/0000-0002-6842-8615</contrib-id>
<name name-style="western">
<surname>Rosales Sánchez</surname>
<given-names>Christian</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Investigation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="fn" rid="currentaff001"><sup>¤</sup></xref>
<xref ref-type="corresp" rid="cor001">*</xref>
<xref ref-type="aff" rid="aff001"/>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Díaz-Cabrera</surname>
<given-names>Dolores</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Funding acquisition</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"/>
</contrib>
<contrib contrib-type="author" xlink:type="simple">
<name name-style="western">
<surname>Hernández-Fernaud</surname>
<given-names>Estefanía</given-names>
</name>
<role content-type="http://credit.casrai.org/">Conceptualization</role>
<role content-type="http://credit.casrai.org/">Formal analysis</role>
<role content-type="http://credit.casrai.org/">Investigation</role>
<role content-type="http://credit.casrai.org/">Methodology</role>
<role content-type="http://credit.casrai.org/">Writing – original draft</role>
<role content-type="http://credit.casrai.org/">Writing – review &amp; editing</role>
<xref ref-type="aff" rid="aff001"/>
</contrib>
</contrib-group>
<aff id="aff001"><addr-line>Universidad de La Laguna, Tenerife, Islas Canarias, España</addr-line></aff>
<contrib-group>
<contrib contrib-type="editor" xlink:type="simple">
<name name-style="western">
<surname>Alessandri</surname>
<given-names>Guido</given-names>
</name>
<role>Editor</role>
<xref ref-type="aff" rid="edit1"/>
</contrib>
</contrib-group>
<aff id="edit1"><addr-line>Sapienza, University of Rome, ITALY</addr-line></aff>
<author-notes>
<fn fn-type="conflict" id="coi001">
<p>The authors have declared that no competing interests exist.</p>
</fn>
<fn fn-type="current-aff" id="currentaff001">
<label>¤</label>
<p>Current address: Departamento de Psicología Cognitiva, Social y Organizacional, Facultad de Psicología, Campus de Guajara, Tenerife, Spain</p>
</fn>
<corresp id="cor001">* E-mail: <email xlink:type="simple">crosales@ull.es</email></corresp>
</author-notes>
<pub-date pub-type="epub">
<day>19</day>
<month>9</month>
<year>2019</year>
</pub-date>
<pub-date pub-type="collection">
<year>2019</year>
</pub-date>
<volume>14</volume>
<issue>9</issue>
<elocation-id>e0222694</elocation-id>
<history>
<date date-type="received">
<day>26</day>
<month>3</month>
<year>2019</year>
</date>
<date date-type="accepted">
<day>5</day>
<month>9</month>
<year>2019</year>
</date>
</history>
<permissions>
<copyright-year>2019</copyright-year>
<copyright-holder>Rosales Sánchez et al</copyright-holder>
<license xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">
<license-p>This is an open access article distributed under the terms of the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/" xlink:type="simple">Creative Commons Attribution License</ext-link>, which permits unrestricted use, distribution, and reproduction in any medium, provided the original author and source are credited.</license-p>
</license>
</permissions>
<self-uri content-type="pdf" xlink:href="info:doi/10.1371/journal.pone.0222694"/>
<abstract>
<p>Performance appraisal is a complex process by which an organization can determine the extent to which employees are performing their work effectively. However, this appraisal may not be accurate if there is no reduction in the impact of problems caused by possibly subjective rater judgements. The main objective of this work is to check the effectiveness—separately and jointly—of the following four training programmes in the extant literature aimed at improving the accuracy of performance assessment: 1) Performance Dimension Training, 2) Frame-of-Reference, 3) Rater Error Training, and 4) Behavioural Observation Training. Based on these training strategies, three programmes were designed and applied separately. A fourth programme was a combination of the other three. We analyzed two studies using different samples (85 students and 42 employees) for the existence of differences in the levels of knowledge of performance and its dimensions, rater errors, observational accuracy, and accuracy of task and citizenship performance appraisal, according to the type of training raters receive. First, the main results show that training based on performance dimensions and the creation of a common framework, in addition to the training that includes the four programmes (Training_4_programmes), increases the level of knowledge of performance and its dimensions. Second, groups that receive training in rater error score higher in knowledge of biases than the other groups, whether or not they have received training. Third, participants’ observational accuracy improves with each new moment measure (post-training and follow-up), though not because of the type of training received. Fourth, participants who receive training through the programme that combine the other four gave a task performance appraisal that was closer to the one undertaken by the judges-experts than the other groups. And finally, students’ citizenship performance appraisal does not vary according to type of training or to different moment measures, whereas the group of employees who received all four types of training gave a more accurate citizenship performance assessment.</p>
</abstract>
<funding-group>
<award-group id="award001">
<funding-source>
<institution-wrap>
<institution-id institution-id-type="funder-id">http://dx.doi.org/10.13039/501100003033</institution-id>
<institution>Ministerio de Ciencia, Tecnología e Innovación Productiva</institution>
</institution-wrap>
</funding-source>
<award-id>PSI2010-17327</award-id>
<principal-award-recipient>
<name name-style="western">
<surname>Díaz-Cabrera</surname>
<given-names>Dolores</given-names>
</name>
</principal-award-recipient>
</award-group>
<funding-statement>DDC: This study is framed within project PSI2010-17327, financed by the National Programme for Fundamental Research Projects of the Ministry of Science, Innovation, and Universities of the Government of Spain (MICINN). The funders had no role in study design, data collection and analysis, decision to publish, or preparation of the manuscript.</funding-statement>
</funding-group>
<counts>
<fig-count count="1"/>
<table-count count="2"/>
<page-count count="20"/>
</counts>
<custom-meta-group>
<custom-meta id="data-availability">
<meta-name>Data Availability</meta-name>
<meta-value>The data is available in <ext-link ext-link-type="uri" xlink:href="http://doi.org/10.3886/E109701V1" xlink:type="simple">http://doi.org/10.3886/E109701V1</ext-link>.</meta-value>
</custom-meta>
</custom-meta-group>
</article-meta>
</front>
<body>
<sec id="sec001" sec-type="intro">
<title>Introduction</title>
<p>Task and citizenship performance appraisal is essential for organizations because it provides important information for people management [<xref ref-type="bibr" rid="pone.0222694.ref001">1</xref>]. Task performance appraisal is centred on the contribution of specific employees in terms of tasks formally assigned to their job and position [<xref ref-type="bibr" rid="pone.0222694.ref002">2</xref>,<xref ref-type="bibr" rid="pone.0222694.ref003">3</xref>,<xref ref-type="bibr" rid="pone.0222694.ref004">4</xref>]. Citizenship performance appraisal refers to behaviours that provide assistance and support for the basic formal tasks of the job and the creation of an organizational environment of social and psychological support, which fosters the smooth running of the organization [<xref ref-type="bibr" rid="pone.0222694.ref005">5</xref>,<xref ref-type="bibr" rid="pone.0222694.ref006">6</xref>]. This paper focuses on task and citizenship performance because these performances are generally evaluated by all organizations that follow a performance appraisal system, unlike counterproductive behaviours or the most recently proposed adaptive performance. Checking the effectiveness of different training programmes involves considering both task and citizenship performance, because they are the most commonly used in practice and because very few studies have analyzed the effectiveness of training on citizenship performance appraisals.</p>
<p>An efficient performance appraisal process calls for rater accuracy. The main objective of this work is therefore to check whether the theoretical and practical training of performance raters improves the accuracy of their appraisal. Traditionally, two strategies have been used in response to the problems created by subjective rater judgements on performance: more accurate rating scales and rater training [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. In recent years, interest has centred on rater training, since improved rater response scales are not considered to further increase performance accuracy or reduce bias [<xref ref-type="bibr" rid="pone.0222694.ref008">8</xref>]. Training encourages raters to use the skills and tools required to improve their performance accuracy, while increasing participant satisfaction with the system [<xref ref-type="bibr" rid="pone.0222694.ref009">9</xref>].</p>
<p>Woehr and Huffcutt [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>] categorized existing rater training programmes and carried out a subsequent meta-analysis, based on the results of previous studies, to assess their effectiveness, classifying them into four types: 1) Rater Error Training (RET); 2) Performance Dimension Training (PDimT); 3) Behavioural Observation Training (BOT); and 4) Frame-of-Reference (FOR). These four strategies share the same objective: to improve rater precision and accuracy.</p>
<p>Verifying the effectiveness of training programmes to improve rating accuracy entails several procedures to compare rater rating with true scores (or true scores estimate, a term proposed by Sulsky &amp; Blazer [<xref ref-type="bibr" rid="pone.0222694.ref010">10</xref>] as being more precise), generally issued by experts (for a review, see Sulsky &amp; Blazer [<xref ref-type="bibr" rid="pone.0222694.ref010">10</xref>]). The most commonly used measurements by which to operationalize true scores are the four indices proposed by Cronbach [<xref ref-type="bibr" rid="pone.0222694.ref011">11</xref>] and the two by Borman [<xref ref-type="bibr" rid="pone.0222694.ref012">12</xref>]. These indices entail comparing the distance between the scores given by a group of experts (true scores estimates) and those given by training programme participants [<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>].</p>
<p>Studies on rater training programmes have generally used one of Cronbach’s [<xref ref-type="bibr" rid="pone.0222694.ref011">11</xref>] or Borman’s [<xref ref-type="bibr" rid="pone.0222694.ref012">12</xref>] indices, or a combination of several. Thus, Gorman and Rentsch [<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref015">15</xref>], Sulsky and Day [<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>], Sulsky and Kline [<xref ref-type="bibr" rid="pone.0222694.ref017">17</xref>], Raczynski, Cohen, Engelhard, and Lu [<xref ref-type="bibr" rid="pone.0222694.ref018">18</xref>], and Woehr [<xref ref-type="bibr" rid="pone.0222694.ref019">19</xref>], among others, use Cronbach’s indices [<xref ref-type="bibr" rid="pone.0222694.ref011">11</xref>], along with Borman’s distance accuracy [<xref ref-type="bibr" rid="pone.0222694.ref012">12</xref>], to analyze training effectiveness.</p>
<p>With regard to the four types of training classified by Woehr and Huffcutt [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>], the main objective of training in RET is to increase appraisal accuracy by familiarizing raters with common classification errors and biases (e.g. similarity, contrast, primacy, recency, negativity, first impression, leniency, central tendency, severity, halo effect) [<xref ref-type="bibr" rid="pone.0222694.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0222694.ref020">20</xref>,<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. Traditionally, participants are trained in the definitions of the involuntary biases in which they may incur and which affect the accuracy of their appraisals. To that end, they are shown graphic illustrations of numerical examples of how such biases may interfere with their appraisals. Moreover, some programmes include debates with participants about how to avoid bias in performance appraisals of fictitious characters shown in videos [<xref ref-type="bibr" rid="pone.0222694.ref021">21</xref>]. The results of several studies indicate that this programme reduces the influence of these biases on appraisal [<xref ref-type="bibr" rid="pone.0222694.ref020">20</xref>,<xref ref-type="bibr" rid="pone.0222694.ref021">21</xref>,<xref ref-type="bibr" rid="pone.0222694.ref022">22</xref>]. However, results also show that this reduction may have a negative effect on rater accuracy [<xref ref-type="bibr" rid="pone.0222694.ref023">23</xref>], depending on the location of the main focus of the training [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>].</p>
<p>Smith’s [<xref ref-type="bibr" rid="pone.0222694.ref024">24</xref>] PDimT emerges as an alternative to accuracy problems in bias training. The main aim is to improve rater accuracy by familiarizing raters with the meaning of performance, along with its components and dimensions, and by involving them in the design and review of the rating scale being used [<xref ref-type="bibr" rid="pone.0222694.ref024">24</xref>,<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. Results show that this approach increases the degree of agreement between the appraisals of several raters and between those made by each of them [<xref ref-type="bibr" rid="pone.0222694.ref010">10</xref>], thereby ensuring assessments are more precise and accurate [<xref ref-type="bibr" rid="pone.0222694.ref019">19</xref>]. Nevertheless, we found no study that assesses the influence on rater accuracy of this programme alone. Thus, the study by Pulakos [<xref ref-type="bibr" rid="pone.0222694.ref025">25</xref>], which compared the RET and PDimT programmes, reveals, among other results, that the group that only received training in performance dimensions gave more precise scores than the untrained group.</p>
<p>Third, in 1980, there emerged a new line of research in training programmes focusing on rater observation skills (BOT) [<xref ref-type="bibr" rid="pone.0222694.ref026">26</xref>]. The objective was for raters to closely observe ratee behaviours and to improve their own recall of them [<xref ref-type="bibr" rid="pone.0222694.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>]. This strategy uses memory and recognition of specific behaviour events as a dependent variable [<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref026">26</xref>]. Thus, several techniques and procedures have been evaluated [<xref ref-type="bibr" rid="pone.0222694.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref026">26</xref>]. Aguinis [<xref ref-type="bibr" rid="pone.0222694.ref009">9</xref>] proposes training raters to use notes or diaries as observation strategies to enable them to record behaviours that must be evaluated in each performance dimension. Sulsky and Day [<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>] include measures of behavioural recognition, in which participants are asked to indicate from a list which behaviours really occurred in a fictitious situation. And Thornton and Zorich [<xref ref-type="bibr" rid="pone.0222694.ref026">26</xref>] created a multiple-choice questionnaire, with true/false responses, or a combination of alternatives, to evaluate a sample of behaviours with stimulus material. Although the effectiveness of this type of training has been little studied, the meta-analysis made by Woehr and Huffcutt [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>] shows a positive effect on both appraisal and observational accuracy (<italic>d</italic> = .77 and .49, respectively). However, the study by Hedge and Kavanagh [<xref ref-type="bibr" rid="pone.0222694.ref029">29</xref>] does not provide conclusive results on the effectiveness of this programme.</p>
<p>Fourth, the frame-of-reference strategy (FOR), proposed by Bernardin and Buckley [<xref ref-type="bibr" rid="pone.0222694.ref030">30</xref>], highlights the importance of the fact that raters a) are aware of the multidimensionality of performance, in order to familiarize themselves with identifying each ratee behaviour with the correct performance dimension [<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>], and that they b) share a framework or common conceptualization regarding the nature of performance, so that it can be evaluated in a similar way by different raters [<xref ref-type="bibr" rid="pone.0222694.ref031">31</xref>,<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>]. It therefore focuses on intervening in the way in which raters codify, organize, and recall information [<xref ref-type="bibr" rid="pone.0222694.ref032">32</xref>]. The final aim is to obtain more accurate appraisals from participants based on the presentation of small samples of performance at work, along with the performance dimension appraisal issued by a group of experts [<xref ref-type="bibr" rid="pone.0222694.ref024">24</xref>]. However, variations have been observed in the studies, in terms of experimental design, programme structure, length of training, and method of accuracy appraisal. Thus, the programme designed by Bernardin and Buckley [<xref ref-type="bibr" rid="pone.0222694.ref030">30</xref>] establishes a series of stages, ranging from familiarizing participants with how to obtain a profile of personal requirements for jobs, based on job descriptions, a performance appraisal of a fictitious employee, and a justification of the appraisal, to a group debate about the discrepancies between the correct appraisals provided by the trainer and those issued by the participants.</p>
<p>In recent years, the FOR programme has been the most used and cited by various authors compared with other strategies [<xref ref-type="bibr" rid="pone.0222694.ref033">33</xref>,<xref ref-type="bibr" rid="pone.0222694.ref034">34</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref015">15</xref>,<xref ref-type="bibr" rid="pone.0222694.ref035">35</xref>,<xref ref-type="bibr" rid="pone.0222694.ref036">36</xref>,<xref ref-type="bibr" rid="pone.0222694.ref037">37</xref>,<xref ref-type="bibr" rid="pone.0222694.ref018">18</xref>,<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>,<xref ref-type="bibr" rid="pone.0222694.ref038">38</xref>,<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>], although variations have been observed in the studies regarding experimental design, programme structure, length of training, and method of accuracy appraisal. Studies have shown the positive effect of the FOR programme on appraisal accuracy [<xref ref-type="bibr" rid="pone.0222694.ref039">39</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref040">40</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>,<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>]. Thus, Lievens and Sánchez [<xref ref-type="bibr" rid="pone.0222694.ref036">36</xref>] found that the trained group presented significantly higher values in disciminant validity, interrater reliability, and appraisal accuracy compared with the control group.</p>
<p>However, there were variations in the results regarding appraisal accuracy (<italic>d</italic> = .50 and <italic>d</italic> = .83, respectively) in the studies by Roch et al. [<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>] and by Woehr and Huffcutt [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. As Roch et al. [<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>] suggest, this variation may be due to sample size, number of effect sizes, and the measurements used to determine the degree of accuracy. Finally, some research has examined a little studied variant in the field of rater training: programme combinations. On the one hand, it highlights comparison of the effect of the combined training of two rather than only one of the strategies [<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref032">32</xref>], and on the other, the combination of all four main training types, but without comparing the effect of each training type independently [<xref ref-type="bibr" rid="pone.0222694.ref041">41</xref>].</p>
<p>The aim of this research is therefore to analyze how training influences rater performance appraisal according to the type of training received. We propose the following specific objectives:</p>
<list list-type="order">
<list-item><p>To analyze how the level of knowledge of job performance, its dimensions, assessment, and the most common biases vary according to the type of training received.</p></list-item>
<list-item><p>To verify whether rater observational accuracy changes according to training type.</p></list-item>
<list-item><p>To examine whether the task and citizenship performance appraisal of a fictitious employee is modified according to the training programme followed.</p></list-item>
</list>
<p>To test the objectives, we carried out two studies.</p>
</sec>
<sec id="sec002">
<title>First study</title>
<p>In the first study we have proposed four hypotheses:</p>
<p><italic>Hypothesis 1</italic>.<italic>1</italic> (<italic>H1</italic>.<italic>1</italic>): groups trained in performance dimensions and FOR, and in all four programme types will score higher in general knowledge of performance than others programmes. This hypothesis is based on the fact that the programmes PDimT and FOR concentrate on developing knowledge about performance and its dimensions [<xref ref-type="bibr" rid="pone.0222694.ref031">31</xref>,<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>,<xref ref-type="bibr" rid="pone.0222694.ref024">24</xref>,<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. Moreover, the programme that includes the contents of all four programmes focuses on participant familiarization with the concept of performance and its dimensions, because it incorporates the contents of PDimT and FOR.</p>
<p><italic>Hypothesis 1</italic>.<italic>2</italic> (<italic>H1</italic>.<italic>2</italic>): groups trained in Rater Error Training (RET) and in all four programme types will score higher in knowledge of appraisal biases than the other groups. The RET training programme aims to develop participant awareness of the biases that can affect appraisal accuracy [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. It is therefore logical to expect participants in this programme to have better mastery of biases and how to avoid them than the other training programmes. This is also true for the complete training programme, because it includes training in bias identification.</p>
<p><italic>Hypothesis 1</italic>.<italic>3</italic> (<italic>H1</italic>.<italic>3</italic>): group trained in observational accuracy and in all four programme types will identify more accurately the occurrence or non-occurrence of various events. The aim of the BOT training programme is to improve the observational capacity of participants regarding the behaviours of employees being appraised [<xref ref-type="bibr" rid="pone.0222694.ref009">9</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>] and it is therefore expected to foster greater observational accuracy. The same is expected of the programme that combines all four types of training, since it includes training in rater observation skills from the BOT programme.</p>
<p><italic>Hypothesis 1</italic>.<italic>4</italic> (<italic>H1</italic>.<italic>4</italic>): group trained in all four training programmes will produce a task and citizenship performance appraisal closer to the expert judgement than the other groups. Given that the different training programmes have obtained some results that endorse their effectiveness in enhancing appraisal accuracy in the specific aspects on which they focus [<xref ref-type="bibr" rid="pone.0222694.ref020">20</xref>,<xref ref-type="bibr" rid="pone.0222694.ref021">21</xref>,<xref ref-type="bibr" rid="pone.0222694.ref022">22</xref>,<xref ref-type="bibr" rid="pone.0222694.ref036">36</xref>,<xref ref-type="bibr" rid="pone.0222694.ref025">25</xref>,<xref ref-type="bibr" rid="pone.0222694.ref019">19</xref>,<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>], the inclusion of such content (observation, rater error, dimensions and frame-of-reference) in a single training programme should make this programme more effective.</p>
</sec>
<sec id="sec003" sec-type="materials|methods">
<title>Method</title>
<sec id="sec004">
<title>Participants</title>
<p>G*Power: Statistical Power Analyses revealed that sample size should be 75 people, with a 95% confidence level and 5% margin of error for five groups and five dependent variables. The sample was composed of 85 second-year psychology undergraduates, of whom 80.5% were women and 19.5% men. The average age was 20.5 years (<italic>SD</italic> = 3.21, range, 19–38 years). No student had prior work experience.</p>
</sec>
<sec id="sec005">
<title>Design</title>
<p>The design was quasi-experimental, factorial-multivariable and longitudinal (repeated measures), with three moment measures of the training received: before, on completion, and after a month (follow-up); the within-group variable was Moment measure.</p>
<p>As an independent between-group variable, Type of training had five groups: 1) training in Knowledge of dimensions and Frame-of-reference (KdFOR) (<italic>n</italic> = 18) combines the strategies of two programmes, PDimT and FOR, since both aim to improve knowledge of performance and its dimensions, by fostering correct assessment and agreement between raters, 2) Observational accuracy training (<italic>n</italic> = 16), 3) Rater Error Training (RET) (<italic>n</italic> = 19), 4) Training_4_Programmes, training that includes the content addressed in the other three groups (<italic>n</italic> = 15), and 5) Control group (no training, <italic>n</italic> = 17). The <xref ref-type="supplementary-material" rid="pone.0222694.s001">S1 Appendix</xref> contains the objectives, contents, and length of each training programme. The methodological strategies used in all four training programmes included group discussion, written practice, and the joint compiling of conclusions. Video instruction was also given in training programmes on observational accuracy, identification, risk prevention, and full training. Five measures were used as dependent variables: 1) Knowledge of performance and its dimensions, 2) Knowledge of biases in appraisals, 3) Observational accuracy, 4) Task performance appraisal, and 5) Citizenship performance appraisal.</p>
</sec>
<sec id="sec006">
<title>Materials and tools</title>
<p>The materials used included two video stimuli, five training videos, a short film, and training manuals. Five measuring tools were used: three ad-hoc questionnaires: two to evaluate knowledge of performance and its dimensions, and of bias in appraisal, and one to measure participants’ observational accuracy; and two performance assessment scales (task or citizenship). Below is a description of the materials and tools:</p>
<list list-type="bullet">
<list-item><p><bold>Videos</bold>: two versions of one video were created as stimulus material for performance assessments. The video shows five samples of an employee’s performance over five working days. The work samples are the same in both videos, the script for which was created in a previous study [<xref ref-type="bibr" rid="pone.0222694.ref042">42</xref>]. These work samples included a series of performance task activities habitually carried out by administrative and office staff, as well as the employee’s citizenship behaviours. On three of the five working days, the employee gave an adequate task and citizenship performance, while on the two remaining days, performance in both areas was inadequate. To reduce the recall effect when the material was assessed three times, the working days and main actors were presented in a different order. Both versions of the videos lasted around 30 minutes.</p></list-item>
<list-item><p><bold>Training videos</bold>: in several training activities, five videos illustrated various performance samples of one or several employees in an administrative position.</p></list-item>
<list-item><p><bold>Short film</bold>: an 18-minute-long audio-visual film entitled <italic>Life Vest Under Your Seat</italic> was used to assess observational accuracy [<xref ref-type="bibr" rid="pone.0222694.ref043">43</xref>]. The short film is set on a flight from Madrid to Miami, whose flight path is altered because of the conduct of a passenger who decides to break all the usual in-flight rules of behaviour and safety. This short film was chosen because it had not been widely circulated and was largely unknown to participants, thereby counteracting the influence of recall.</p></list-item>
<list-item><p><bold>Training manuals</bold>: trainer and participant manuals were devised for each training programme. They included various individual, group, theoretical, and practical activities and exercises (<xref ref-type="supplementary-material" rid="pone.0222694.s001">S1 Appendix</xref>).</p></list-item>
<list-item><p><bold>Questionnaire on knowledge of performance and its dimensions</bold>: this ad-hoc paper tool was composed of 11 items that evaluated knowledge of job performance, types, and dimensions. Participants had to decide whether each item was true or false, obtaining a score from 0 to 10. This questionnaire is included as supplementary material (<xref ref-type="supplementary-material" rid="pone.0222694.s002">S1 Questionnaire</xref>).</p></list-item>
<list-item><p><bold>Questionnaire on knowledge of biases in performance assessment</bold>: this ad-hoc tool was composed of 21 items on the most frequently occurring biases in performance assessment. The response scale was dichotomous (True or False), with a score from 0 to 10. This questionnaire is included as supplementary material (<xref ref-type="supplementary-material" rid="pone.0222694.s003">S2 Questionnaire</xref>).</p></list-item>
<list-item><p><bold>Checklist of observational accuracy</bold>: a 155-item ad-hoc paper tool that describes events that may or may not have happened in the short film <italic>Life Vest Under Your Seat</italic>. Participants were required to indicate whether or not the event described had taken place, and any items they had doubts about could be left blank. The events were presented in a different order than in the video. Of the total items, 115 took place in the short film, while 40 did not. The response scale was dichotomous (Yes/No). The number of correct responses was converted into a scale of zero to ten: correct responses to all 155 items gave a score of ten, while incorrect responses resulted in a proportional drop in score. This questionnaire is included as supplementary material (<xref ref-type="supplementary-material" rid="pone.0222694.s004">S3 Questionnaire</xref>).</p></list-item>
<list-item><p><bold>Spanish adaptation of Coleman and Borman’s (2000) scale of citizenship performance behaviours</bold> [<xref ref-type="bibr" rid="pone.0222694.ref006">6</xref>]: originally composed of 27 items, 20 were selected for this study. Seven items were excluded from the scale because they represented citizenship behaviours that were not performed by the actors in the videos. These items were excluded because the behaviours recorded in them are absent from the videos. The response scale ranged from 1 to 7 with three anchors: Not at all characteristic, Characteristic and More characteristic than of anyone else, and was completed using a computer application. The reliability of the original scale is high (α = 0.96). Moreover, it produced a single (unidimensional) or several (multidimensional) measures of citizenship performance. In this work, we obtained a unidimensional measure of citizenship performance, and the reliability of the scale was 0.87.</p></list-item>
<list-item><p><bold>Task performance assessment scale</bold>: this tool was composed of 14 items based on the task inventory of a previous job analysis [<xref ref-type="bibr" rid="pone.0222694.ref044">44</xref>]. The participants used a computer application to evaluate the quality and frequency with which the main characters in each video performed the tasks associated with their jobs [<xref ref-type="bibr" rid="pone.0222694.ref045">45</xref>]. First, for each task, three behavioural descriptions appeared on screen. These descriptions represented different performance levels, though no explanation was given as to which performance level each one corresponded (Excellent, Good, Improvable). These response alternatives appeared randomly in a different position for each item. That is, the responses were not always displayed from deficient to excellent performance, or vice versa. Subsequently, when the level of performance was selected, three new alternatives were displayed, allowing the participant to report on the frequency with which the employee performed each task according to the level of quality previously indicated. Therefore, the response scale of this tool ranged from 1 to 9.</p></list-item>
</list>
</sec>
<sec id="sec007">
<title>Procedure</title>
<p>Participants were recruited on different days and from different class groups. Participants registered on various lists according to their availability, as the training programmes were held at different times. In each group the order of presentation of the two versions of the stimulus video was counterbalanced, so that half of each group assessed version 1 in the pre-test, version 2 in the post-test, and version 1 in the follow-up, while the other half began the procedure with version 2.</p>
<p>For the pre-test measure, participants were required to first complete both questionnaires on knowledge of performance and biases in the appraisal. Second, the short film <italic>Life Vest Under Your Seat</italic> was screened and participants completed the observational accuracy checklist. Third, the corresponding version of the video of the employee being assessed was shown. Participants could take notes during the presentation. Fourth, using a computer application, participants assessed citizenship and task performance of the main character.</p>
<p>All the groups, except control group, received training from a psychologist qualified in Psychology of Work and Organizations and with teaching experience. Training followed a participative methodology and was carried out in groups of seven to nine. Depending on the group, training lasted from two to thirteen hours. All groups completed the post-training measure (knowledge of performance and biases, observational accuracy, citizenship and task performance) four days after the pre-test measure. Finally, follow-up was done a month later by applying the same protocol as in previous moment measures.</p>
<p>The control group used the same instruments as the groups in training, in the same order and in the same period. The only difference with the other groups was that the control group received no training. <xref ref-type="fig" rid="pone.0222694.g001">Fig 1</xref> gives the different phases of the procedure.</p>
<fig id="pone.0222694.g001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0222694.g001</object-id>
<label>Fig 1</label>
<caption>
<title>Phases of the procedure in studies 1 and 2.</title>
</caption>
<graphic mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.g001" xlink:type="simple"/>
</fig>
<p>Participation in this study was voluntary and consented. Participation at all stages of the research was rewarded by a small increment in the final marks of a subject, once passed, of a second-year undergraduate degree, depending on the number of hours’ participation.</p>
<p>At the same time as student training was taking place, two versions of the video were shown to a group of three experts in Psychology of Work and Organizations with research and professional experience in performance assessment. Assessment of the citizenship and task performance of the employees featuring in both videos was obtained following the Delphi method. In the first stage, the three experts received an email containing the description of the fictitious employee’s job performance, along with several questionnaires for them to appraise the task and citizenship performance of this employee. The experts were given a week to return their scores. In the second stage, the researchers reduced the response scale anchors used in the performance assessment scales, only keeping the alternatives chosen most frequently by the group of experts. The evaluation questionnaires with modified response scales were resent to the experts, who were again given a week to assess the performance of the fictitious employee. This time they were also asked to explain and justify their responses. In the third stage, the questionnaires were rearranged to include only the most voted options on the response scale, as well as a summary of the experts’ most important comments on the character's performance. The experts then used these questionnaires as a basis to discuss the appropriate scoring of each item, subsequently reaching an agreement on the fictitious employee’s task and citizenship performance. This assessment was reached by consensus among the experts and was used as a criterion to evaluate the goodness-of-fit of participants’ assessments, in the understanding that assessments will be better the closer they are to those done by experts.</p>
</sec>
<sec id="sec008">
<title>Ethics statement</title>
<p>At the time of participant recruitment, because the study involved no risk to participants, informed consent was given verbally. Participants were clearly informed that participation was voluntary. When they came to take part in the pre-test checks, they gave their written consent. The study did not include minors. The University of La Laguna Ethics Committee in Tenerife, Spain (ULLECT) approved this study.</p>
</sec>
<sec id="sec009">
<title>Data and analysis</title>
<p>The database of this first study can be consulted at <ext-link ext-link-type="uri" xlink:href="http://doi.org/10.3886/E109701V1" xlink:type="simple">http://doi.org/10.3886/E109701V1</ext-link>. Data analysis was performed using IBM SPSS Statistics software, version 21.</p>
</sec>
</sec>
<sec id="sec010" sec-type="conclusions">
<title>Results and discussion</title>
<p>First, typical scores and multivariate outliers with the Mahalanobis distance were used to analyze normality; three cases with atypical values were eliminated. The remaining analyses were made with 82 valid cases.</p>
<p>Second, we checked for differences in performance appraisals depending on the order of presentation of both versions of the video. Only one significant interaction of the Order of presentation with the Moment measure was obtained in citizenship performance appraisal (<italic>F</italic>(2, 79) = 15.24; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .28), so that the citizenship performance of the main character was valued more positively in the pre-training measure (Version 1 <italic>M</italic> = 3,69; Version 2 <italic>M</italic> = 2,94) (<italic>t</italic> = 4.68, <italic>p</italic> &lt; .001) and follow-up (Version 1 <italic>M</italic> = 3,55; Version 2 <italic>M</italic> = 3,19) (<italic>t</italic> = 2.67, <italic>p</italic> &lt; .01) in version 1 than in version 2 of the video. Third, groups were analyzed for differences in pre-test scores in each dependent variable, with only one significant difference being found in observational accuracy (<italic>F</italic>(4, 77) = 2.95; <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .13), although a posteriori analysis with the Scheffé test did not reveal differences between groups.</p>
<p>Fourth, a one-way repeated measures multivariate analysis of variance (MANOVA) was undertaken using a between-group independent variable, Type of training, and the within-group variable, which corresponds to three moment measures for each dependent variable. <xref ref-type="table" rid="pone.0222694.t001">Table 1</xref> shows the descriptive statistics of the dependent variables in each group and moment measure. The experts’ scores for the fictitious employee’s performance were 4.9 (scale, 1–9) for task performance appraisal and 3.5 (scale, 1–7) for citizenship performance appraisal.</p>
<table-wrap id="pone.0222694.t001" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0222694.t001</object-id>
<label>Table 1</label> <caption><title>Descriptive statistics of the dependent variables in each training group and moment measure (students).</title></caption>
<alternatives>
<graphic id="pone.0222694.t001g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.t001" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center"/>
<th align="center"/>
<th align="center" colspan="7">Moment measure</th>
</tr>
<tr>
<th align="left"/>
<th align="left"/>
<th align="center" colspan="2">Pre-training</th>
<th align="center" colspan="2">Post-training</th>
<th align="center" colspan="3">Follow-Up</th>
</tr>
<tr>
<th align="left"/>
<th align="left"/>
<th align="center"><italic>M</italic></th>
<th align="center"><italic>SD</italic></th>
<th align="center"><italic>M</italic></th>
<th align="center"><italic>SD</italic></th>
<th align="center" colspan="2"><italic>M</italic></th>
<th align="center"><italic>SD</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="5">Knowledge of performance and its dimensions</td>
<td align="center">G1</td>
<td align="center">5.6</td>
<td align="center">2.8</td>
<td align="center">9.7</td>
<td align="center">0.8</td>
<td align="center" colspan="2">8.6</td>
<td align="center">1.9</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">5</td>
<td align="center">2.1</td>
<td align="center">7.5</td>
<td align="center">2</td>
<td align="center" colspan="2">7.3</td>
<td align="center">2.1</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">4.9</td>
<td align="center">2.8</td>
<td align="center">6.9</td>
<td align="center">2.6</td>
<td align="center" colspan="2">7.0</td>
<td align="center">1.8</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">6.4</td>
<td align="center">2.0</td>
<td align="center">9.6</td>
<td align="center">1.0</td>
<td align="center" colspan="2">9.5</td>
<td align="center">1.1</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">5.8</td>
<td align="center">1.5</td>
<td align="center">6.7</td>
<td align="center">2.1</td>
<td align="center" colspan="2">6.9</td>
<td align="center">1.9</td>
</tr>
<tr>
<td align="center" rowspan="5">Knowledge of biases in performance assessment</td>
<td align="center">G1</td>
<td align="center">3.1</td>
<td align="center">1.8</td>
<td align="center">3.2</td>
<td align="center">1.6</td>
<td align="center">4</td>
<td align="center" colspan="2">2.6</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">3.0</td>
<td align="center">2.4</td>
<td align="center">4.0</td>
<td align="center">2.1</td>
<td align="center">4.4</td>
<td align="center" colspan="2">2.9</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">2.7</td>
<td align="center">1.6</td>
<td align="center">7.7</td>
<td align="center">1.7</td>
<td align="center">7.4</td>
<td align="center" colspan="2">1.4</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">4.5</td>
<td align="center">2.4</td>
<td align="center">7.5</td>
<td align="center">2.1</td>
<td align="center">8.2</td>
<td align="center" colspan="2">1.4</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">3.5</td>
<td align="center">2.1</td>
<td align="center">4</td>
<td align="center">2.2</td>
<td align="center">4.3</td>
<td align="center" colspan="2">2</td>
</tr>
<tr>
<td align="center" rowspan="5">Observational accuracy</td>
<td align="center">G1</td>
<td align="center">4.5</td>
<td align="center">1.0</td>
<td align="center">5.4</td>
<td align="center">1.6</td>
<td align="center">6.2</td>
<td align="center" colspan="2">0.8</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">5.0</td>
<td align="center">0.6</td>
<td align="center">6.0</td>
<td align="center">0.8</td>
<td align="center">6.4</td>
<td align="center" colspan="2">0.6</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">4.1</td>
<td align="center">0.9</td>
<td align="center">5.6</td>
<td align="center">0.8</td>
<td align="center">6.3</td>
<td align="center" colspan="2">0.6</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">4.8</td>
<td align="center">0.7</td>
<td align="center">6.1</td>
<td align="center">0.6</td>
<td align="center">6.4</td>
<td align="center" colspan="2">0.5</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">4.8</td>
<td align="center">0.7</td>
<td align="center">5.9</td>
<td align="center">1.1</td>
<td align="center">6.6</td>
<td align="center" colspan="2">0.7</td>
</tr>
<tr>
<td align="center" rowspan="6">Task performance appraisal</td>
<td align="center">G1</td>
<td align="center">5.5</td>
<td align="center">0.7</td>
<td align="center">5.1</td>
<td align="center">0.4</td>
<td align="center">5.1</td>
<td align="center" colspan="2">0.2</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">5.8</td>
<td align="center">0.4</td>
<td align="center">5.7</td>
<td align="center">0.5</td>
<td align="center">5.8</td>
<td align="center" colspan="2">0.6</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">5.7</td>
<td align="center">0.4</td>
<td align="center">5.7</td>
<td align="center">0.5</td>
<td align="center">5.7</td>
<td align="center" colspan="2">0.5</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">5.4</td>
<td align="center">0.5</td>
<td align="center">5.0</td>
<td align="center">0.5</td>
<td align="center">5.1</td>
<td align="center" colspan="2">0.4</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">5.8</td>
<td align="center">0.7</td>
<td align="center">5.7</td>
<td align="center">0.5</td>
<td align="center">5.7</td>
<td align="center" colspan="2">0.7</td>
</tr>
<tr>
<td align="center"/>
<td align="center" colspan="7"/>
</tr>
<tr>
<td align="center" rowspan="6">Citizenship performance appraisal</td>
<td align="center">G1</td>
<td align="center">3.1</td>
<td align="center">0.6</td>
<td align="center">3.1</td>
<td align="center">0.4</td>
<td align="center">3.2</td>
<td align="center" colspan="2">0.5</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">3.1</td>
<td align="center">0.6</td>
<td align="center">3.2</td>
<td align="center">0.7</td>
<td align="center">3.4</td>
<td align="center" colspan="2">0.5</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">3.3</td>
<td align="center">0.4</td>
<td align="center">3.3</td>
<td align="center">0.4</td>
<td align="center">3.3</td>
<td align="center" colspan="2">0.6</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">3.1</td>
<td align="center">0.9</td>
<td align="center">3.1</td>
<td align="center">0.3</td>
<td align="center">3.1</td>
<td align="center" colspan="2">0.4</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">4.1</td>
<td align="center">0.9</td>
<td align="center">3.7</td>
<td align="center">0.7</td>
<td align="center">4.0</td>
<td align="center" colspan="2">0.7</td>
</tr>
<tr>
<td align="center"/>
<td align="center" colspan="7"/>
</tr>
<tr>
<td align="center" rowspan="5">Distance task performance appraisal</td>
<td align="center">G1</td>
<td align="center">0.6</td>
<td align="center">0.7</td>
<td align="center">0.4</td>
<td align="center">0.3</td>
<td align="center">0.2</td>
<td align="center" colspan="2">0.2</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">0.9</td>
<td align="center">0.4</td>
<td align="center">0.8</td>
<td align="center">0.5</td>
<td align="center">0.9</td>
<td align="center" colspan="2">0.6</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">0.7</td>
<td align="center">0.4</td>
<td align="center">0.8</td>
<td align="center">0.5</td>
<td align="center">0.8</td>
<td align="center" colspan="2">0.5</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">0.6</td>
<td align="center">0.4</td>
<td align="center">0.4</td>
<td align="center">0.3</td>
<td align="center">0.3</td>
<td align="center" colspan="2">0.3</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">0.9</td>
<td align="center">0.8</td>
<td align="center">0.8</td>
<td align="center">0.5</td>
<td align="center">0.8</td>
<td align="center" colspan="2">0.7</td>
</tr>
<tr>
<td align="center" rowspan="5">Distance citizenship performance appraisal</td>
<td align="center">G1</td>
<td align="center">0.6</td>
<td align="center">0.4</td>
<td align="center">0.5</td>
<td align="center">0.4</td>
<td align="center">0.5</td>
<td align="center" colspan="2">0.4</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">0.6</td>
<td align="center">0.3</td>
<td align="center">0.7</td>
<td align="center">0.4</td>
<td align="center">0.4</td>
<td align="center" colspan="2">0.3</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">0.4</td>
<td align="center">0.2</td>
<td align="center">0.3</td>
<td align="center">0.3</td>
<td align="center">0.5</td>
<td align="center" colspan="2">0.4</td>
</tr>
<tr>
<td align="center">G4</td>
<td align="center">0.6</td>
<td align="center">0.8</td>
<td align="center">0.4</td>
<td align="center">0.3</td>
<td align="center">0.4</td>
<td align="center" colspan="2">0.3</td>
</tr>
<tr>
<td align="center">G5</td>
<td align="center">0.7</td>
<td align="center">0.8</td>
<td align="center">0.6</td>
<td align="center">0.4</td>
<td align="center">0.7</td>
<td align="center" colspan="2">0.6</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t001fn001"><p>G1: Group training in Knowledge of dimensions and Frame-of-reference; G2: Group training in Observational accuracy; G3: Group training in Rater Error; G4: Group training in the previous three areas, Training_4_Programmes; G5: Control group.</p></fn>
</table-wrap-foot>
</table-wrap>
<sec id="sec011">
<title>Effect on level of knowledge of performance and its dimensions</title>
<p>Significant main effects were obtained for Moment measure (<italic>F</italic>(2, 154) = 58.62; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .43; statistical power = 1.0) and Type of training (<italic>F</italic>(4, 77) = 7.51; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .28; statistical power = .99), as well as for interaction (<italic>F</italic>(8,154) = 2.35, <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .11; statistical power = .88). A posteriori contrasts indicate that the groups trained in PDimT and FOR, and in Training_4_Programmes gain higher scores in knowledge of performance than the others, both during post-training and follow-up (<italic>p</italic> &lt; .01).</p>
</sec>
<sec id="sec012">
<title>Effect on level of knowledge of biases in assessment</title>
<p>Main effects were obtained of Moment measures (<italic>F</italic>(2, 154) = 60.83; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .44; statistical power = 1.0) and Type of training (<italic>F</italic>(4, 77) = 12.31; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .39; statistical power = 1.0), as well as significant interaction (<italic>F</italic>(8, 154) = 10.61; <italic>p</italic>&lt; .001; <italic>η</italic><sup>2</sup> = .35; statistical power = 1.0). A posteriori analyses showed that groups trained in Rater Error and in Training_4_Programmes scored higher in knowledge of biases in assessment than the other groups in the measure obtained after training and during follow-up (<italic>p</italic> &lt; .001).</p>
</sec>
<sec id="sec013">
<title>Effect on observational accuracy</title>
<p>In this analysis, we used the Greenhouse-Geiser procedure to correct the degrees of freedom because Mauchly’s sphericity test was significant (<italic>χ</italic><sup>2</sup>(2) = 13.29, <italic>p</italic> &lt; .01). A significant main effect of Moment measure (<italic>F</italic>(2, 132) = 135.351; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .64; statistical power = 1.0) was obtained. A posteriori analyses showed that all the groups increased their observational accuracy in each new moment measure (<italic>p</italic> &lt; .05).</p>
</sec>
<sec id="sec014">
<title>Effect on task performance assessment</title>
<p>The contrast was created using the Distance variable in relation to the expert group assessment, which was the absolute value resulting from subtracting the expert assessment from that made by each participant. Thus, assessments that were more similar to the expert assessment were considered more accurate.</p>
<p>In this case, as Mauchly’s sphericity test was significant (<italic>χ</italic><sup>2</sup>(2) = 7.49, <italic>p</italic> &lt; .05), we also used the Huynh-Feldt correction. Significant main effects were obtained for Moment measure (<italic>F</italic>(2, 151) = 3.92; <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .05; statistical power = .70) and Type of training (<italic>F</italic>(4, 77) = 4.59; <italic>p</italic>&lt; .01; <italic>η</italic><sup>2</sup> = .19; statistical power = .93), as well as for interaction (<italic>F</italic>(8, 154) = 2.13; <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .10; statistical power = .83). A posteriori contrasts showed that the task performance appraisal carried out by groups trained in Dimensions and Frame-of-reference, and in the Training_4_Programmes were more similar to the expert version than the other groups.</p>
</sec>
<sec id="sec015">
<title>Effect on citizenship task performance assessment</title>
<p>No main effects or statistically significant interaction were found and subsequently there was no variation in the type of training given or in the different moments of measure of participants’ citizenship performance appraisals.</p>
</sec>
</sec>
<sec id="sec016" sec-type="conclusions">
<title>Conclusions</title>
<p>The results confirm the first two hypotheses. Knowledge of performance and its dimensions, and of biases in assessment has increased in the groups that received specific training, either through an independent programme or through the Training_4_programmes. This study contributes interesting data on the usefulness of training programmes for increasing knowledge of the performance dimensions and biases that may arise during appraisal. The results also show the stability of the knowledge acquired over time, an aspect of training in Rater Error that has been specifically criticized [<xref ref-type="bibr" rid="pone.0222694.ref021">21</xref>]. The results did not corroborate hypothesis 1.3, in relation to observational accuracy, since participants in all the experimental groups, regardless of type of training and whether or not they had received any, were more accurate in each new moment measure.</p>
<p>The fourth hypothesis, which considered that the group trained in all four programmes would produce a task and citizenship performance appraisal closer to the expert judgement than the other groups, was partially confirmed. Moreover, task performance assessment revealed that the scores of both the group that received Training_4_Programmes and the group trained in FOR and PDimT were closer to those issued by the group of experts than the control group and other experimental groups. This result concurs with that of previous studies, where participants trained in the creation of a common frame-of-reference, either independently or combined with other types of training, gave more accurate appraisals than those who had received no training or minimal training [<xref ref-type="bibr" rid="pone.0222694.ref046">46</xref>,<xref ref-type="bibr" rid="pone.0222694.ref033">33</xref>,<xref ref-type="bibr" rid="pone.0222694.ref034">34</xref>,<xref ref-type="bibr" rid="pone.0222694.ref039">39</xref>,<xref ref-type="bibr" rid="pone.0222694.ref041">41</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref015">15</xref>,<xref ref-type="bibr" rid="pone.0222694.ref029">29</xref>,<xref ref-type="bibr" rid="pone.0222694.ref047">47</xref>,<xref ref-type="bibr" rid="pone.0222694.ref035">35</xref>,<xref ref-type="bibr" rid="pone.0222694.ref048">48</xref>,<xref ref-type="bibr" rid="pone.0222694.ref036">36</xref>,<xref ref-type="bibr" rid="pone.0222694.ref037">37</xref>,<xref ref-type="bibr" rid="pone.0222694.ref049">49</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref025">25</xref>,<xref ref-type="bibr" rid="pone.0222694.ref050">50</xref>,<xref ref-type="bibr" rid="pone.0222694.ref018">18</xref>,<xref ref-type="bibr" rid="pone.0222694.ref032">32</xref>,<xref ref-type="bibr" rid="pone.0222694.ref051">51</xref>,<xref ref-type="bibr" rid="pone.0222694.ref052">52</xref>,<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref017">17</xref>,<xref ref-type="bibr" rid="pone.0222694.ref053">53</xref>,<xref ref-type="bibr" rid="pone.0222694.ref054">54</xref>]. The results of this study, unlike those obtained in other studies [<xref ref-type="bibr" rid="pone.0222694.ref023">23</xref>,<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>], show that training in Rater Error included in a combined programme does not affect appraisal accuracy. In citizenship performance assessment, none of the groups showed any improvement in assessment accuracy.</p>
<p>The results of this study must be interpreted bearing in mind that the sample is made up of students. The following research replicates this study with a sample of employees, considering only the training programmes that engender more effective assessment in relation to a group with no training.</p>
</sec>
<sec id="sec017">
<title>Second study</title>
<p>The objective of this study, using a sample of employees, is to test the effectiveness of the two training programmes that obtained the best results in the previous study: Knowledge of dimensions and Frame-of-reference (KdFOR), and Training_4_Programmes. The specific hypotheses proposed for this second study are:</p>
<p><italic>Hypothesis 2</italic>.<italic>1</italic> (<italic>H2</italic>.<italic>1</italic>): groups receiving training score higher in general knowledge of performance than the control group.</p>
<p><italic>Hypothesis 2</italic>.<italic>2</italic> (<italic>H2</italic>.<italic>2</italic>): groups trained in all four programme types will score higher in knowledge of appraisal biases than the other groups.</p>
<p><italic>Hypothesis 2</italic>.<italic>3</italic> (<italic>H2</italic>.<italic>3</italic>): groups trained in all four programme types will identify more accurately the occurrence or non-occurrence of various events.</p>
<p><italic>Hypothesis 2</italic>.<italic>4</italic> (<italic>H2</italic>.<italic>4</italic>): group trained in all four training programmes will produce a task and citizenship performance appraisal closer to the expert judgement than the other groups.</p>
</sec>
<sec id="sec018" sec-type="materials|methods">
<title>Method</title>
<sec id="sec019">
<title>Participants</title>
<p>G*Power: Statistical Power Analyses revealed that sample size should be 54 people, with a 95% confidence level and 5% margin of error with three groups and five dependent variables. Given the mortality rate of the sample, the sample consisted of 42 employees from different organizations, of whom 59.5% were women and 40.5% men. The average age was 43.52 years (range, 28–60 years). Of the participants, 92.85% had received a university education and the rest secondary schooling. Participants held positions of responsibility over other persons or positions where they were required to assess other employees. Following the International Standard Classification of Occupations (ISCO-88), 42.85% of the employees held management or middle management positions in administration and human resources, 54.76% were professionals or mid-level technicians (e.g., lawyers, advisors, nurses, psychologists, teachers), and 2.38% were office workers. Of the sample, 54.8% had some prior experience in performance appraisals.</p>
</sec>
<sec id="sec020">
<title>Design</title>
<p>We used the same design as in the first study: quasi-experimental, factorial-multivariable and longitudinal (repeated measures) analysis, obtaining for each employee three measures gathered at three different moments: before and after training, and a month after training (follow-up), which constituted the within-group variable: Moment measure. Three groups were created for the between-group variable, Type of training: 1) training in Knowledge of dimensions and Frame-of-reference (KdFOR) (<italic>n</italic> = 15), 2) Training_4_Programmes (<italic>n</italic> = 13), and 3) Control group (without training, <italic>n</italic> = 14). The dependent variables were the same.</p>
</sec>
<sec id="sec021">
<title>Tools and procedure</title>
<p>We used the same materials and tools outlined in the previous study. Participants were recruited from various public and private companies, and were organized into training and control groups, according to their availability. The procedure was the same as in the first study (<xref ref-type="fig" rid="pone.0222694.g001">Fig 1</xref>).</p>
<p>Employee participation in this study was voluntary and consented. By way of incentive, participants received some economic compensation at the end of each research stage, according to the time spent. In this study, the order of presentation of both versions of the video used as a stimulus for performance appraisal was not counterbalanced because of the difficulty in adjusting timetables to employee availability.</p>
</sec>
<sec id="sec022">
<title>Ethics statement</title>
<p>Because the study involved no risk to participants, informed consent was given verbally. A meeting was held to provide information about the research project: participation was voluntary and participants could leave at any time; the data collected would be used exclusively for research purposes; personal data protection was ensured; and participation signified that participants gave their consent to the use of the research data. The University of La Laguna Ethics Committee in Tenerife, Spain (ULLECT) approved this study.</p>
</sec>
<sec id="sec023">
<title>Data and Analysis</title>
<p>The database of this second study can be consulted at <ext-link ext-link-type="uri" xlink:href="http://doi.org/10.3886/E109701V1" xlink:type="simple">http://doi.org/10.3886/E109701V1</ext-link>. Data analysis was also performed using IBM SPSS Statistics software, version 21.</p>
</sec>
</sec>
<sec id="sec024" sec-type="conclusions">
<title>Results and discussion</title>
<p>Firstly, we checked for the absence of univariate and multivariate outliers. Typical scores were used to test univariate outliers. Multivariate outliers were checked with Mahalanobis distance. No outliers were found. Secondly, to test the effect or previous experience in performance appraisal, MANOVA was undertaken using two between-group independent variables, Type of training and Previous experience in performance appraisal, and the within-group variable, Moment of measure, for each dependent variable. No main effects or statistically significant interaction were found. Thirdly, groups were analyzed for differences in pre-test scores in each dependent variable, with only one significant difference being found in knowledge of performance and its dimensions (<italic>F</italic>(2, 39) = 8.46; <italic>p</italic> &lt; .01; <italic>η</italic><sup>2</sup> = .30). A posteriori analysis with the Scheffé test revealed that the control group shows a lower score for Knowledge of performance and its dimensions (<italic>p</italic> &lt; .05) when compared with the group trained in Dimensions and Frame-of-reference. <xref ref-type="table" rid="pone.0222694.t002">Table 2</xref> shows the descriptive statistics of the dependent variables in each group and moment measure.</p>
<table-wrap id="pone.0222694.t002" position="float">
<object-id pub-id-type="doi">10.1371/journal.pone.0222694.t002</object-id>
<label>Table 2</label> <caption><title>Descriptive statistics of the dependent variables in each training group and moment measure (employees).</title></caption>
<alternatives>
<graphic id="pone.0222694.t002g" mimetype="image" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.t002" xlink:type="simple"/>
<table>
<colgroup>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
<col align="left" valign="middle"/>
</colgroup>
<thead>
<tr>
<th align="center"/>
<th align="center"/>
<th align="center" colspan="6">Moment measure</th>
</tr>
<tr>
<th align="left"/>
<th align="left"/>
<th align="center" colspan="2">Pre-training</th>
<th align="center" colspan="2">Post-training</th>
<th align="center" colspan="2">Follow-Up</th>
</tr>
<tr>
<th align="left"/>
<th align="left"/>
<th align="center"><italic>M</italic></th>
<th align="center"><italic>SD</italic></th>
<th align="center"><italic>M</italic></th>
<th align="center"><italic>SD</italic></th>
<th align="center"><italic>M</italic></th>
<th align="center"><italic>SD</italic></th>
</tr>
</thead>
<tbody>
<tr>
<td align="center" rowspan="3">Knowledge of performance and its dimensions</td>
<td align="center">G1</td>
<td align="center">7.02</td>
<td align="center">2.70</td>
<td align="center">8.40</td>
<td align="center">2.70</td>
<td align="center">9.21</td>
<td align="center">1.49</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">5.80</td>
<td align="center">1.20</td>
<td align="center">8.18</td>
<td align="center">1.48</td>
<td align="center">8.81</td>
<td align="center">1.01</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">4.10</td>
<td align="center">1.41</td>
<td align="center">4.46</td>
<td align="center">1.28</td>
<td align="center">4.57</td>
<td align="center">1.19</td>
</tr>
<tr>
<td align="center" rowspan="3">Knowledge of biases in performance assessment</td>
<td align="center">G1</td>
<td align="center">3.97</td>
<td align="center">3.10</td>
<td align="center">4.22</td>
<td align="center">3.30</td>
<td align="center">4.67</td>
<td align="center">3.24</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">2.71</td>
<td align="center">2.63</td>
<td align="center">6.92</td>
<td align="center">1.96</td>
<td align="center">7.69</td>
<td align="center">1.13</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">2.47</td>
<td align="center">0.99</td>
<td align="center">3.25</td>
<td align="center">0.94</td>
<td align="center">3.78</td>
<td align="center">1.26</td>
</tr>
<tr>
<td align="center" rowspan="3">Observational accuracy</td>
<td align="center">G1</td>
<td align="center">4.80</td>
<td align="center">0.79</td>
<td align="center">5.64</td>
<td align="center">0.63</td>
<td align="center">6.05</td>
<td align="center">0.87</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">4.52</td>
<td align="center">0.87</td>
<td align="center">5.78</td>
<td align="center">0.52</td>
<td align="center">6.23</td>
<td align="center">0.47</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">4.18</td>
<td align="center">0.90</td>
<td align="center">5.72</td>
<td align="center">0.85</td>
<td align="center">6.14</td>
<td align="center">0.72</td>
</tr>
<tr>
<td align="center" rowspan="4">Task performance appraisal</td>
<td align="center">G1</td>
<td align="center">5.6</td>
<td align="center">1.0</td>
<td align="center">5.3</td>
<td align="center">0.7</td>
<td align="center">5.0</td>
<td align="center">0.9</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">5.6</td>
<td align="center">0.4</td>
<td align="center">5.3</td>
<td align="center">0.3</td>
<td align="center">4.9</td>
<td align="center">0.3</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">6.0</td>
<td align="center">0.7</td>
<td align="center">5.8</td>
<td align="center">0.5</td>
<td align="center">5.7</td>
<td align="center">0.7</td>
</tr>
<tr>
<td align="center"/>
<td align="center" colspan="6"/>
</tr>
<tr>
<td align="center" rowspan="4">Citizenship performance appraisal</td>
<td align="center">G1</td>
<td align="center">3.3</td>
<td align="center">0.8</td>
<td align="center">2.9</td>
<td align="center">0.6</td>
<td align="center">3.2</td>
<td align="center">0.7</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">3.3</td>
<td align="center">0.5</td>
<td align="center">3.3</td>
<td align="center">0.3</td>
<td align="center">3.4</td>
<td align="center">0.2</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">4.1</td>
<td align="center">0.3</td>
<td align="center">4.4</td>
<td align="center">0.3</td>
<td align="center">4.4</td>
<td align="center">0.5</td>
</tr>
<tr>
<td align="center"/>
<td align="center" colspan="6"/>
</tr>
<tr>
<td align="center" rowspan="3">Distance task performance appraisal</td>
<td align="center">G1</td>
<td align="center">1.0</td>
<td align="center">0.6</td>
<td align="center">0.6</td>
<td align="center">0.4</td>
<td align="center">0.6</td>
<td align="center">0.6</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">0.7</td>
<td align="center">0.4</td>
<td align="center">0.4</td>
<td align="center">0.3</td>
<td align="center">0.2</td>
<td align="center">0.2</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">1.1</td>
<td align="center">0.7</td>
<td align="center">0.9</td>
<td align="center">0.5</td>
<td align="center">0.9</td>
<td align="center">0.7</td>
</tr>
<tr>
<td align="center" rowspan="3">Distance citizenship performance appraisal</td>
<td align="center">G1</td>
<td align="center">0.7</td>
<td align="center">0.4</td>
<td align="center">0.7</td>
<td align="center">0.5</td>
<td align="center">0.6</td>
<td align="center">0.5</td>
</tr>
<tr>
<td align="center">G2</td>
<td align="center">0.4</td>
<td align="center">0.3</td>
<td align="center">0.3</td>
<td align="center">0.3</td>
<td align="center">0.2</td>
<td align="center">0.1</td>
</tr>
<tr>
<td align="center">G3</td>
<td align="center">0.6</td>
<td align="center">0.3</td>
<td align="center">0.9</td>
<td align="center">0.3</td>
<td align="center">0.9</td>
<td align="center">0.5</td>
</tr>
</tbody>
</table>
</alternatives>
<table-wrap-foot>
<fn id="t002fn001"><p>G1: Group training in Knowledge of dimensions and Frame-of-reference; G2: Group training in the previous three areas, Training_4_Programmes; G3: Control group.</p></fn>
</table-wrap-foot>
</table-wrap>
<p>Fourthly, a one-way repeated measures multivariate analysis of variance (MANOVA) was undertaken using a between-group independent variable, Type of training, and the within-group variable, Moment of measure, for each dependent variable.</p>
<sec id="sec025">
<title>Effect on level of knowledge of performance and its dimensions</title>
<p>Significant main effects were obtained for the Moment measure (<italic>F</italic>(2, 78) = 20.887; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .35; statistical power = 1.0) and Type of training (<italic>F</italic>(2, 39) = 33.92; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .64; statistical power = 1.0), as well as for interaction (<italic>F</italic>(4, 78) = 3.377; <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .15; statistical power = .83). A posteriori contrasts indicate that the two trained groups gained higher scores in Knowledge of performance than the control group (<italic>p</italic> &lt; .05), although these differences already existed in the first moment measure (pre-training) of the group trained in Dimensions of performance and Frame-of-reference.</p>
</sec>
<sec id="sec026">
<title>Effect on level of knowledge of biases in assessment</title>
<p>As Mauchly’s sphericity test was significant (<italic>χ</italic><sup>2</sup>(2) = 7.950; <italic>p</italic> &lt; .05), we also used the Greenhouse-Geisser procedure to correct the degrees of freedom. Significant main effects were obtained for Moment measure (<italic>F</italic>(2, 66) = 26.61; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .41; statistical power = 1.0) and Type of training (<italic>F</italic>(2, 39) = 6,071; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .24; statistical power = .86), as well as for interaction (<italic>F</italic>(3, 66) = 9,714; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .33; statistical power = .99). A posteriori analysis showed that the group that received Training_4_Programmes scored higher in Knowledge of biases in assessment than the other groups (<italic>p</italic> &lt; .01).</p>
</sec>
<sec id="sec027">
<title>Effect on observational accuracy</title>
<p>As Mauchly’s sphericity test was significant (<italic>χ</italic><sup>2</sup>(2) = 12.06; <italic>p</italic> &lt; .01), we used the Huynh-Feldt correction. A significant main effect was obtained for Moment measure (<italic>F</italic>(2, 67) = 151.709; <italic>p</italic> &lt; .001; <italic>η</italic><sup>2</sup> = .80; statistical power = 1.0) and interaction (<italic>F</italic>(3, 67) = 3.110; <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .14; statistical power = .74). A posteriori analysis showed no significant differences between the groups for the three moment measures.</p>
</sec>
<sec id="sec028">
<title>Effect on task performance assessment</title>
<p>As before, we calculated the effectiveness of the performance appraisals using distance in relation to the expert group assessment. Main effects were obtained for Moment measure (<italic>F</italic>(2, 78) = 12.221; <italic>p</italic> &lt; .001; <italic>η</italic><sup><italic>2</italic></sup> = .24; statistical power = .99), and Type of training (<italic>F</italic>(2, 39) = 4.860; <italic>p</italic> &lt; .05; <italic>η</italic><sup>2</sup> = .20; statistical power = .77). A posteriori contrast between the various moment measures gave significant differences between the pre- and post-training measures (<italic>p</italic> &lt; .05), and follow-up (<italic>p</italic> &lt; .05). A posteriori contrast with the Scheffé test in the Type of training variable showed that the group that received Training_4_Programmes produced more accurate assessments than the control group (<italic>p</italic> &lt; .05).</p>
</sec>
<sec id="sec029">
<title>Effect on citizenship task performance assessment</title>
<p>A significant main effect was obtained for the variable Moment Training (<italic>F</italic>(2, 39) = 8.374; <italic>p</italic> &lt; .01; <italic>η</italic><sup>2</sup> = .30; statistical power = .95) and interaction (<italic>F</italic>(4, 78) = 3.717; <italic>p</italic> &lt; .01; <italic>η</italic><sup>2</sup> = .16; statistical power = .87). A posteriori contrasts indicate that the group that received Training_4_Programmes issued a more accurate citizenship performance appraisal than the other two groups (<italic>p</italic> &lt; .05).</p>
</sec>
</sec>
<sec id="sec030" sec-type="conclusions">
<title>Conclusions</title>
<p>The results of this second study concur with those previously obtained in this work and with those of other authors: trained employees show a higher level of knowledge in the post-training and follow-up measure [<xref ref-type="bibr" rid="pone.0222694.ref039">39</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>]. As considered in hypothesis 2.1, trained groups score higher in knowledge of performance than the control group. Although this difference was already significant in the moment measure before training, especially for the group trained in Dimensions and Frame-of-reference, descriptive statistics show that the higher score in knowledge is greater for the two trained groups. Likewise, the group that received Training_4_programmes improved knowledge of biases in assessment, in line with hypothesis 2.2. For observational accuracy, contrary to hypothesis 2.3, once again all the groups showed greater accuracy in each new moment measure.</p>
<p>The results confirm hypothesis 2.4 because the group trained in all four programmes produced a task and citizenship performance appraisal closer to the one issued by the expert rater group than the control group.</p>
</sec>
<sec id="sec031">
<title>General discussion</title>
<p>The aim of this study was to analyze how the type of theoretical and practical training influences performance appraisal. The first study compared performance appraisal by students who were trained in four types of programmes (Knowledge of dimensions and Frame-of-reference, KdFOR; Observational accuracy; Rater Error Training, RET; and Training_4_programmes) and that conducted by those who received no training. The second study tested the effectiveness among employees of the two training programmes: Knowledge of dimensions and Frame-of-reference, and the Training_4_Programmes, which gave better results in the first study.</p>
<p>Both studies found that training increases knowledge of performance and its dimensions, and of biases in assessment, as was posited in hypotheses 1.1, 2.1 and 1.2, 2.2. Thus, for students and employees alike, the training programmes facilitate the acquisition of knowledge about bias identification and performance dimensions, as well as the development of a framework-of-reference shared by the raters. This knowledge can improve performance appraisal accuracy. To this end, data on the effectiveness of training programmes are provided, an issue highlighted by some authors as one of the aspects that requires further study in this field [<xref ref-type="bibr" rid="pone.0222694.ref055">55</xref>].</p>
<p>In relation to the third hypothesis (1.3 and 2.3), we analyzed the rating accuracy of participants in the various training groups when deciding on the occurrence or non-occurrence of several events from a list about a short film. This kind of analysis is similar to one previously conducted by other researchers who attempted to assess whether suitable or specific training can increase rater observational accuracy [<xref ref-type="bibr" rid="pone.0222694.ref033">33</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref032">32</xref>,<xref ref-type="bibr" rid="pone.0222694.ref028">28</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref026">26</xref>,<xref ref-type="bibr" rid="pone.0222694.ref019">19</xref>]. Likewise, most of these authors indicated that, although training does not improve performance assessment accuracy, it is beneficial in terms of recognition and recall. In both studies, the hypothesis raised was rejected. All participants, trained or otherwise, were more accurate at each new assessment moment. That is, the score of all participants improved in the post-training moment measure, in comparison with the pre-training measure, which was lower than that obtained during follow-up. A plausible explanation can be the learning associated with the task, along with repeated exposure—three times—to the list of events and behaviours, and to the short film.</p>
<p>By contrasting with the fourth hypothesis (1.4 and 2.4), we have attempted to decide whether task and citizenship performance appraisal varies according to the type of training received. Several studies have analyzed the effectiveness of the various types of rater training along these lines [<xref ref-type="bibr" rid="pone.0222694.ref056">56</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref057">57</xref>,<xref ref-type="bibr" rid="pone.0222694.ref035">35</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref050">50</xref>,<xref ref-type="bibr" rid="pone.0222694.ref032">32</xref>,<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>,<xref ref-type="bibr" rid="pone.0222694.ref052">52</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref053">53</xref>,<xref ref-type="bibr" rid="pone.0222694.ref054">54</xref>,<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>]. Likewise, in order to evaluate the improvement of assessment accuracy, most studies [<xref ref-type="bibr" rid="pone.0222694.ref058">58</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref015">15</xref>,<xref ref-type="bibr" rid="pone.0222694.ref018">18</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref017">17</xref>,<xref ref-type="bibr" rid="pone.0222694.ref054">54</xref>,<xref ref-type="bibr" rid="pone.0222694.ref019">19</xref>] have used Cronbach’s indices [<xref ref-type="bibr" rid="pone.0222694.ref011">11</xref>] or Borman’s distance accuracy index [<xref ref-type="bibr" rid="pone.0222694.ref012">12</xref>]. In this study, we chose Borman’s index [<xref ref-type="bibr" rid="pone.0222694.ref012">12</xref>], using the scores given by a group of experts as a reference measure to evaluate appraisal effectiveness.</p>
<p>The results of the first study showed that raters who received training in frame-of-reference or the Training_4_Programmes produced a more accurate performance appraisal, in line with other works [<xref ref-type="bibr" rid="pone.0222694.ref034">34</xref>,<xref ref-type="bibr" rid="pone.0222694.ref041">41</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref015">15</xref>,<xref ref-type="bibr" rid="pone.0222694.ref040">40</xref>,<xref ref-type="bibr" rid="pone.0222694.ref035">35</xref>,<xref ref-type="bibr" rid="pone.0222694.ref036">36</xref>,<xref ref-type="bibr" rid="pone.0222694.ref037">37</xref>,<xref ref-type="bibr" rid="pone.0222694.ref049">49</xref>,<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>,<xref ref-type="bibr" rid="pone.0222694.ref018">18</xref>,<xref ref-type="bibr" rid="pone.0222694.ref032">32</xref>,<xref ref-type="bibr" rid="pone.0222694.ref051">51</xref>,<xref ref-type="bibr" rid="pone.0222694.ref017">17</xref>,<xref ref-type="bibr" rid="pone.0222694.ref053">53</xref>,<xref ref-type="bibr" rid="pone.0222694.ref054">54</xref>]. For citizenship performance appraisal, however, training did not improve student accuracy, as in the study by Sulsky et al. [<xref ref-type="bibr" rid="pone.0222694.ref053">53</xref>]. This result may be due to the difficulty of capturing citizenship performance in a video, thereby making assessment difficult, especially for students. However, the results of the second study show greater accuracy in both task and citizenship performance assessment when employees receive training in the Training_4_programmes.</p>
<p>The results allow us to draw a series of conclusions. First, that the group trained in Dimensions and Frame-of-reference excelled compared with others is in line with the results of other authors, showing once again the effectiveness of this type of programme [<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref056">56</xref>,<xref ref-type="bibr" rid="pone.0222694.ref035">35</xref>,<xref ref-type="bibr" rid="pone.0222694.ref013">13</xref>,<xref ref-type="bibr" rid="pone.0222694.ref052">52</xref>,<xref ref-type="bibr" rid="pone.0222694.ref016">16</xref>,<xref ref-type="bibr" rid="pone.0222694.ref053">53</xref>,<xref ref-type="bibr" rid="pone.0222694.ref054">54</xref>]. Second, that the increased accuracy of appraisal by the Training_4_Programmes group goes against the results obtained by Noonan and Sulsky [<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>], who point out that the combined use of several types of training does not lead to a significant increase in effectiveness, beyond the improvement obtained from implementing the programmes separately. However, it supports the work of Eppich et al. [<xref ref-type="bibr" rid="pone.0222694.ref041">41</xref>], who achieved considerable improvement in rater accuracy by combining all types of training strategies, despite a small sample size. Moreover, a positive aspect of the studies presented in this paper is that, unlike that of Eppich et al. [<xref ref-type="bibr" rid="pone.0222694.ref041">41</xref>], appraisal accuracy is measured by using true scores from a group of experts. Third, Sulsky et al. [<xref ref-type="bibr" rid="pone.0222694.ref053">53</xref>] highlight the importance of citizenship behaviours as an essential part of employees’ daily work. An important contribution of this study is the inclusion of citizenship performance assessment as a trainable aspect, since only very few studies have used training in citizenship performance appraisal and have shown the effectiveness of that training. Future research should continue to explore how the accuracy of citizenship performance appraisal can be improved and why training is sometimes effective and sometimes not, as shown by these results.</p>
<p>Another contribution of this study is the comparison of all the training programmes categorized by Woehr and Huffcutt [<xref ref-type="bibr" rid="pone.0222694.ref007">7</xref>], as well as their combination and comparison with an untrained group, thereby facilitating the evaluation of the effectiveness of various components covered in each training programme.</p>
<p>These findings are not exempt of certain limitations when the results are generalized. On the one hand, the sample used in the first study was composed of students whose perspective may be distanced from the reality of the world of work. Nevertheless, although performance assessment is associated with employees, it is routine practice to use students in research focusing on performance rater training. Laboratory situations are presented with a fictitious evaluation task in which students are required to play the role of the rater [<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>]. In the second study, the sample was made up of employees, whose limited availability curbed the counterbalancing of the order of presentation of the videos used as a stimulus for performance assessment and random assignment to experimental groups. Moreover, this sample contained a high percentage of participants with university studies. Using raters with a different level of study may give different results.</p>
<p>Moreover, it would have been advisable to have a larger group size. However, these types of longitudinal studies that require considerable time involvement from participants are associated with difficulties in recruiting and maintaining the sample. Despite efforts to increase sample size, it was not possible, and the power of the results was analyzed a posteriori, revealing an adequate value in most cases.</p>
<p>On the other hand, another improvable aspect is the use of videos as a base material for carrying out performance appraisal. In this regard, Noonan and Sulsky [<xref ref-type="bibr" rid="pone.0222694.ref027">27</xref>] were the first authors to study the effectiveness of rater training (FOR and BOT) in the applied field, since, until then, all studies had used laboratory situations with a fictitious assessment task. Their results show that in applied fields and with assessments of real employees training also improves assessment effectiveness. However, in order to claim that the effect of training is greater or more easily assimilated when real employees are rated, this procedure should be compared with a trained group rating fictitious employees. That said, in future, it would be interesting to implement programmes that have excelled in rater training in one or several organizations, so that pre- and post-training, and follow-up measures of performance appraisal are of real employees, with whom raters interact on a daily basis.</p>
<p>Finally, the contribution of this study to the field of rater training programmes is worth noting. Students in the experimental group in Training_4_Programmes and participants in the Frame-of-reference group excelled, gaining the best scores in both knowledge tests and a more accurate task performance appraisal. This finding is a further step ahead in rater training, since a training programme that includes features of all kinds of programmes can be equally effective as Frame-of-reference training, pinpointed in numerous studies as offering greater accuracy [<xref ref-type="bibr" rid="pone.0222694.ref041">41</xref>,<xref ref-type="bibr" rid="pone.0222694.ref014">14</xref>,<xref ref-type="bibr" rid="pone.0222694.ref056">56</xref>,<xref ref-type="bibr" rid="pone.0222694.ref054">54</xref>]. Likewise, when applied to students, the longer combined programme does not necessarily give better results than a shorter training programme; the cost of implementation would therefore not recommend its use. However, when employees receive training, the combined training programme has been shown to be more effective in both types of performance appraisal. Therefore, when the aim of the organization is accurate appraisal in both task and citizenship performance, Training_4_Programmes is more appropriate.</p>
</sec>
<sec id="sec032">
<title>Supporting information</title>
<supplementary-material id="pone.0222694.s001" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.s001" xlink:type="simple">
<label>S1 Appendix</label>
<caption>
<title>Summary of objectives, contents, and length of training programmes.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0222694.s002" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.s002" xlink:type="simple">
<label>S1 Questionnaire</label>
<caption>
<title>Questionnaire on knowledge of performance and its dimensions.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0222694.s003" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.s003" xlink:type="simple">
<label>S2 Questionnaire</label>
<caption>
<title>Questionnaire on knowledge of biases in performance assessment.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
<supplementary-material id="pone.0222694.s004" mimetype="application/pdf" position="float" xlink:href="info:doi/10.1371/journal.pone.0222694.s004" xlink:type="simple">
<label>S3 Questionnaire</label>
<caption>
<title>Checklist of observational accuracy.</title>
<p>(PDF)</p>
</caption>
</supplementary-material>
</sec>
</body>
<back>
<ack>
<p>This study is framed within project PSI2010-17327, financed by the National Programme for Fundamental Research Projects of the Ministry of Science, Innovation, and Universities of the Government of Spain (MICINN).</p>
</ack>
<ref-list>
<title>References</title>
<ref id="pone.0222694.ref001"><label>1</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schraeder</surname> <given-names>M.</given-names></name>, <name name-style="western"><surname>Becton</surname> <given-names>J.B.</given-names></name>, &amp; <name name-style="western"><surname>Portis</surname> <given-names>R.</given-names></name> (<year>2007</year>). <article-title>A Critical Examination of Performance Appraisal: An Organization’s Friend or Foe</article-title>? <source><italic>The Journal for Quality and Participation</italic></source>, <volume>30</volume>, <fpage>20</fpage>–<lpage>25</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref002"><label>2</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Borman</surname> <given-names>W. C.</given-names></name>, &amp; <name name-style="western"><surname>Motowidlo</surname> <given-names>S. J.</given-names></name> (<year>1997</year>). <article-title>Task performance and contextual performance: The meaning for personnel selection research</article-title>. <source>Human Performance</source>, <volume>10</volume>(<issue>2</issue>), <fpage>99</fpage>–<lpage>109</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1207/s15327043hup1002_3" xlink:type="simple">http://dx.doi.org/10.1207/s15327043hup1002_3</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref003"><label>3</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Motowidlo</surname> <given-names>S. J.</given-names></name>, &amp; <name name-style="western"><surname>Schmit</surname> <given-names>M. J.</given-names></name> (<year>1999</year>). <chapter-title>Performance assessment in unique jobs</chapter-title>. In <name name-style="western"><surname>Ilgen</surname> <given-names>D. R.</given-names></name>, &amp; <name name-style="western"><surname>Pulakos</surname> <given-names>E. D.</given-names></name> (Eds.), <source>The changing nature of performance: Implications for staffing, motivation, and development</source> (pp <fpage>56</fpage>–<lpage>87</lpage>). San Francisco: Jossey-Bass.</mixed-citation></ref>
<ref id="pone.0222694.ref004"><label>4</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Viswesvaran</surname> <given-names>C.</given-names></name>, &amp; <name name-style="western"><surname>Ones</surname> <given-names>D. S.</given-names></name> (<year>2000</year>). <article-title>Perspectives on models of job performance</article-title>. <source><italic>International Journal of Selection and Assessment</italic></source>, <volume>8</volume>(<issue>4</issue>), <fpage>216</fpage>–<lpage>226</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1111/1468-2389.00151" xlink:type="simple">http://dx.doi.org/10.1111/1468-2389.00151</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref005"><label>5</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Witman</surname> <given-names>D. S.</given-names></name>, <name name-style="western"><surname>Van Rooy</surname> <given-names>D. L.</given-names></name> &amp; <name name-style="western"><surname>Viswesvaran</surname> <given-names>C.</given-names></name> (<year>2010</year>). <article-title>Satisfaction, citizenship behaviors, and performance in work units: A meta-analysis of collective construct relations</article-title>. <source><italic>Personnel Psychology</italic></source>, <volume>63</volume>, <fpage>41</fpage>–<lpage>81</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref006"><label>6</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Díaz-Vilela</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Díaz- Cabrera</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Isla-Díaz</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Hernández-Fernaud</surname> <given-names>E.</given-names></name>, &amp; <name name-style="western"><surname>Rosales-Sánchez</surname> <given-names>C.</given-names></name> (<year>2012</year>). <article-title>Spanish adaptation of the citizenship performance questionnaire by Coleman y Borman (2000) and an analysis of the empiric structure of the construct</article-title>. <source><italic>Revista de Psicología del Trabajo y las Organizaciones</italic></source>, <volume>28</volume>(<issue>3</issue>), <fpage>135</fpage>–<lpage>149</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.5093/tr2012a11" xlink:type="simple">http://dx.doi.org/10.5093/tr2012a11</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref007"><label>7</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Woehr</surname> <given-names>D.J.</given-names></name>, &amp; <name name-style="western"><surname>Huffcutt</surname> <given-names>A.I.</given-names></name> (<year>1994</year>). <article-title>Rater training for performance appraisal: A quantitative review</article-title>. <source><italic>Journal of Occupational and Organizational Psychology</italic></source>, <volume>67</volume>, <fpage>189</fpage>–<lpage>205</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1111/j.2044-8325.1994.tb00562.x" xlink:type="simple">http://dx.doi.org/10.1111/j.2044-8325.1994.tb00562.x</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref008"><label>8</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Landy</surname> <given-names>F. J.</given-names></name>, &amp; <name name-style="western"><surname>Farr</surname> <given-names>J. L.</given-names></name> (<year>1983</year>). <source><italic>The measurement of work performance</italic>: <italic>Methods</italic>, <italic>theory</italic>, <italic>and applications</italic></source>. <publisher-loc>New York</publisher-loc>: <publisher-name>Academic Press</publisher-name>.</mixed-citation></ref>
<ref id="pone.0222694.ref009"><label>9</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Aguinis</surname> <given-names>H.</given-names></name> (<year>2013</year>). <chapter-title>Performance management</chapter-title>. <source>Upper Saddle River</source>, <publisher-loc>New Jersey</publisher-loc>: <publisher-name>Pearson</publisher-name>.</mixed-citation></ref>
<ref id="pone.0222694.ref010"><label>10</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sulsky</surname> <given-names>L.M.</given-names></name>, &amp; <name name-style="western"><surname>Balzer</surname> <given-names>W.K.</given-names></name> (<year>1988</year>). <article-title>Meaning and measurement of performance rating accuracy. Some methodological and theoretical concerns</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>73</volume>, <fpage>497</fpage>–<lpage>506</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.73.3.497" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.73.3.497</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref011"><label>11</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cronbach</surname> <given-names>L. J.</given-names></name> (<year>1955</year>). <article-title>Processes affecting scores on "understanding of others" and "assumed similarity."</article-title> <source><italic>Psychological Bulletin</italic></source>, <volume>52</volume>, <fpage>177</fpage>–<lpage>193</lpage>. <object-id pub-id-type="pmid">14371889</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref012"><label>12</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Borman</surname> <given-names>W.C.</given-names></name> (<year>1977</year>). <article-title>Consistency of rating accuracy and rating errors in the judgment of human performance</article-title>. <source><italic>Organizational Behavior and Human Performance</italic></source>, <volume>20</volume>, <fpage>238</fpage>–<lpage>252</lpage>. <object-id pub-id-type="pmid">10305661</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref013"><label>13</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Roch</surname> <given-names>S. G.</given-names></name>, <name name-style="western"><surname>Woehr</surname> <given-names>D. J.</given-names></name>, <name name-style="western"><surname>Mishra</surname> <given-names>V.</given-names></name>, &amp; <name name-style="western"><surname>Kieszczynska</surname> <given-names>U.</given-names></name> (<year>2012</year>). <article-title>Rater training revisited: An updated meta-analytic review of frame-of-reference training</article-title>. <source><italic>Journal of Occupational and Organizational Psychology</italic></source>, <volume>85</volume>, <fpage>370</fpage>–<lpage>394</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.2044-8325.2011.02045.x" xlink:type="simple">10.1111/j.2044-8325.2011.02045.x</ext-link></comment></mixed-citation></ref>
<ref id="pone.0222694.ref014"><label>14</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gorman</surname> <given-names>C. A.</given-names></name>, &amp; <name name-style="western"><surname>Rentsch</surname> <given-names>J. R.</given-names></name> (<year>2009</year>). <article-title>Evaluating frame-of-reference rater training effectiveness using performance schema accuracy</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>94</volume>, <fpage>1336</fpage>–<lpage>1344</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/a0016476" xlink:type="simple">10.1037/a0016476</ext-link></comment> <object-id pub-id-type="pmid">19702375</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref015"><label>15</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Gorman</surname> <given-names>C. A.</given-names></name>, &amp; <name name-style="western"><surname>Rentsch</surname> <given-names>J. R.</given-names></name> (<year>2016</year>). <article-title>Retention of Assessment Center Rater Training</article-title>. <source><italic>Journal of Personnel Psychology</italic></source> <volume>16</volume>, <fpage>1</fpage>–<lpage>11</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1027/1866-5888/a000167" xlink:type="simple">10.1027/1866-5888/a000167</ext-link></comment> Hogrefe Publishing.</mixed-citation></ref>
<ref id="pone.0222694.ref016"><label>16</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sulsky</surname> <given-names>L. M.</given-names></name>, &amp; <name name-style="western"><surname>Day</surname> <given-names>D. V.</given-names></name> (<year>1994</year>). <article-title>Effects of frame-of-reference training on rater accuracy under alternative time delays</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>79</volume>, <fpage>535</fpage>–<lpage>543</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.79.4.535" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.79.4.535</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref017"><label>17</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sulsky</surname> <given-names>L. M.</given-names></name>, &amp; <name name-style="western"><surname>Kline</surname> <given-names>T. J. B.</given-names></name> (<year>2007</year>). <article-title>Understanding frame-of-reference training success: A social learning theory perspective</article-title>. <source><italic>International Journal of Training and Development</italic></source>, <volume>11</volume>, <fpage>121</fpage>–<lpage>131</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref018"><label>18</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Raczynski</surname> <given-names>K.R.</given-names></name>, <name name-style="western"><surname>Cohen</surname> <given-names>A.S.</given-names></name>, <name name-style="western"><surname>Engelhard</surname> <given-names>G.</given-names></name> &amp; <name name-style="western"><surname>Lu</surname> <given-names>Z.</given-names></name> (<year>2015</year>). <article-title>Comparing the Effectiveness of Self-Paced and Collaborative Frame-of-Reference Training on Rater Accuracy in a Large-Scale Writing Assessment</article-title>. <source><italic>Journal of Educational Measurement</italic></source>, <volume>52</volume>(<issue>3</issue>), <fpage>301</fpage>–<lpage>318</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/jedm.12079" xlink:type="simple">10.1111/jedm.12079</ext-link></comment></mixed-citation></ref>
<ref id="pone.0222694.ref019"><label>19</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Woehr</surname> <given-names>D. J.</given-names></name> (<year>1994</year>). <article-title>Understanding frame-of-reference training: The impact of training on the recall of performance information</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>79</volume>, <fpage>525</fpage>–<lpage>534</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref020"><label>20</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bernardin</surname> <given-names>H.J.</given-names></name> (<year>1978</year>). <article-title>Effects of rater training on leniency and halo errors in student ratings of instructors</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>63</volume>, <fpage>301</fpage>–<lpage>308</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.63.3.301" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.63.3.301</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref021"><label>21</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Latham</surname> <given-names>G. P.</given-names></name>, <name name-style="western"><surname>Wexley</surname> <given-names>K. N.</given-names></name>, &amp; <name name-style="western"><surname>Pursell</surname> <given-names>E. D.</given-names></name> (<year>1975</year>). <article-title>Training managers to minimize rating errors in the observation of behavior</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>60</volume>,<fpage>550</fpage>–<lpage>555</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.60.5.550" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.60.5.550</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref022"><label>22</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bernardin</surname> <given-names>H.J.</given-names></name>; &amp; <name name-style="western"><surname>Walter</surname> <given-names>C.S.</given-names></name> (<year>1977</year>) <article-title>Effects of rater training and diary-keeping on psychometric error in ratings</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>62</volume>, <fpage>64</fpage>–<lpage>69</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.62.1.64" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.62.1.64</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref023"><label>23</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bernardin</surname> <given-names>H.J.</given-names></name>; &amp; <name name-style="western"><surname>Pence</surname> <given-names>E.C.</given-names></name> (<year>1980</year>). <article-title>The effects of rater training: Creating new response sets and decreasing accuracy</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>65</volume>, <fpage>60</fpage>–<lpage>66</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.65.1.60" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.65.1.60</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref024"><label>24</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Smith</surname> <given-names>D.E.</given-names></name> (<year>1986</year>). <article-title>Programs for performance appraisal: A Review</article-title>. <source><italic>The Academy of Management Review</italic></source>, Vol <volume>11</volume>, No. <issue>1</issue>, <fpage>22</fpage>–<lpage>40</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref025"><label>25</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pulakos</surname> <given-names>E. D.</given-names></name> (<year>1984</year>). <article-title>A comparison of training programs: Error training and accuracy training</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>69</volume>, <fpage>581</fpage>–<lpage>588</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref026"><label>26</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Thornton</surname> <given-names>G.C.</given-names></name> &amp; <name name-style="western"><surname>Zorich</surname> <given-names>S.</given-names></name> (<year>1980</year>). <article-title>Training to improve observer accuracy</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, Vol. <volume>65</volume>, No. <issue>3</issue>, <fpage>351</fpage>–<lpage>354</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.65.3.351" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.65.3.351</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref027"><label>27</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Noonan</surname> <given-names>L.E.</given-names></name>, &amp; <name name-style="western"><surname>Sulsky</surname> <given-names>L.M.</given-names></name> (<year>2001</year>). <article-title>Impact of Frame-of-Reference and Behavioral Observation Training on Alternative Training Effectiveness Criteria in a Canadian Military Sample</article-title>. <source>Human Performance</source>, <volume>14</volume>(<issue>1</issue>), <fpage>3</fpage>–<lpage>26</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1207/S15327043HUP1401_02" xlink:type="simple">http://dx.doi.org/10.1207/S15327043HUP1401_02</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref028"><label>28</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sulsky</surname> <given-names>L. M.</given-names></name>, &amp; <name name-style="western"><surname>Day</surname> <given-names>D. V.</given-names></name> (<year>1992</year>). <article-title>Frame-of-reference training and cognitive categorization: An empirical investigation of rater memory issues</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>77</volume>, <fpage>501</fpage>–<lpage>510</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.77.4.501" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.77.4.501</ext-link> <object-id pub-id-type="pmid">1512184</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref029"><label>29</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hedge</surname> <given-names>J. W.</given-names></name>, &amp; <name name-style="western"><surname>Kavanagh</surname> <given-names>M. J.</given-names></name> (<year>1988</year>). <article-title>Improving the accuracy of performance evaluations: Comparison of three methods of performance appraiser training</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>73</volume>, <fpage>68</fpage>–<lpage>73</lpage></mixed-citation></ref>
<ref id="pone.0222694.ref030"><label>30</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bernardin</surname> <given-names>H. J.</given-names></name>, &amp; <name name-style="western"><surname>Buckley</surname> <given-names>M. R.</given-names></name> (<year>1981</year>). <article-title>Strategies in rater training</article-title>. <source><italic>Academy of Management Review</italic></source>, <volume>6</volume>, <fpage>205</fpage>–<lpage>212</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.5465/AMR.1981.4287782" xlink:type="simple">http://dx.doi.org/10.5465/AMR.1981.4287782</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref031"><label>31</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>McIntyre</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Smith</surname> <given-names>D.</given-names></name>, &amp; <name name-style="western"><surname>Hassett</surname> <given-names>C.</given-names></name> (<year>1984</year>). <article-title>Accuracy of performance ratings as affected by rater training and perceived purpose of rating</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>69</volume>,<fpage>147</fpage>–<lpage>156</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref032"><label>32</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Roch</surname> <given-names>S.G.</given-names></name>, &amp; <name name-style="western"><surname>O’Sullivan</surname> <given-names>B.J.</given-names></name> (<year>2003</year>). <article-title>Frame of reference rater training issues: recall, time and behavior observation training</article-title>. <source><italic>International Journal of Training and Development</italic></source>, <volume>7</volume>:<fpage>2</fpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1111/1468-2419.00174" xlink:type="simple">http://dx.doi.org/10.1111/1468-2419.00174</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref033"><label>33</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Cardy</surname> <given-names>R.</given-names></name>, &amp; <name name-style="western"><surname>Keefe</surname> <given-names>T. J.</given-names></name> (<year>1994</year>). <article-title>Observational purpose and evaluative articulation in frame-of-refer- ence training: The effects of alternative processing modes on rater accuracy</article-title>. <source><italic>Organizational Behavior and Human Decision Processes</italic></source>, <volume>57</volume>, <fpage>338</fpage>–<lpage>357</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref034"><label>34</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Chiciro</surname> <given-names>K. E.</given-names></name>, <name name-style="western"><surname>Buckley</surname> <given-names>M. R.</given-names></name>, <name name-style="western"><surname>Wheeler</surname> <given-names>A. R.</given-names></name>, <name name-style="western"><surname>Facteau</surname> <given-names>J. D.</given-names></name>, <name name-style="western"><surname>Bernardin</surname> <given-names>H. J.</given-names></name>, &amp; <name name-style="western"><surname>Beu</surname> <given-names>D. S.</given-names></name> (<year>2004</year>). <article-title>A note on the need for true scores in frame-of-reference (FOR) training research</article-title>. <source><italic>Journal of Managerial Issues</italic></source>, <volume>16</volume>, <fpage>382</fpage>–<lpage>395</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref035"><label>35</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Keown-Gerrard</surname> <given-names>J.L.</given-names></name>, &amp; <name name-style="western"><surname>Sulsky</surname> <given-names>L.M.</given-names></name> (<year>2001</year>). <article-title>The Effects of Task Information Training and Frame-of-Reference Training With Situational Constraints on Rating Accuracy</article-title>. <source>Human Performance</source>, <volume>14</volume>(<issue>4</issue>), <fpage>305</fpage>–<lpage>320</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1207/S15327043HUP1404_2" xlink:type="simple">http://dx.doi.org/10.1207/S15327043HUP1404_2</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref036"><label>36</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lievens</surname> <given-names>F.</given-names></name>, &amp; <name name-style="western"><surname>Sánchez</surname> <given-names>J. I.</given-names></name> (<year>2007</year>). <article-title>Can training improve the quality of inferences made by raters in competency modeling? A quasi-experiment</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>92</volume>, <fpage>812</fpage>–<lpage>819</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/0021-9010.92.3.812" xlink:type="simple">10.1037/0021-9010.92.3.812</ext-link></comment> <object-id pub-id-type="pmid">17484560</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref037"><label>37</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Loignon</surname> <given-names>A.C.</given-names></name>, <name name-style="western"><surname>Woehr</surname> <given-names>D. J.</given-names></name>, <name name-style="western"><surname>Thomas</surname> <given-names>J.S</given-names></name>, <name name-style="western"><surname>Loughry</surname> <given-names>M.L.</given-names></name>, <name name-style="western"><surname>Ohland</surname> <given-names>M. W.</given-names></name>, &amp; <name name-style="western"><surname>Ferguson</surname> <given-names>D</given-names></name>. (<year>2016</year>). <source>Facilitating Peer Evaluation in Team Contexts: The Impact of Frame-Of-Reference Rater Training</source>. <publisher-name>Academy of Management Learning &amp; Education</publisher-name>.</mixed-citation></ref>
<ref id="pone.0222694.ref038"><label>38</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schleicher</surname> <given-names>D. J.</given-names></name>, &amp; <name name-style="western"><surname>Day</surname> <given-names>D. V.</given-names></name> (<year>1998</year>). <article-title>A cognitive evaluation of frame-of-reference rater training: Content and process issues</article-title>. <source><italic>Organizational Behavior and Human Decision Processes</italic></source>, <volume>73</volume>, <fpage>76</fpage>–<lpage>101</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1006/obhd.1998.2751" xlink:type="simple">http://dx.doi.org/10.1006/obhd.1998.2751</ext-link> <object-id pub-id-type="pmid">9705795</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref039"><label>39</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Day</surname> <given-names>D. V.</given-names></name> &amp; <name name-style="western"><surname>Sulsky</surname> <given-names>L. M.</given-names></name> (<year>1995</year>). <article-title>Effects of frame-of-reference training and ratee information configuration on memory organization and rater accuracy</article-title>. <source>Journal of Applied Psychology</source>, <volume>80</volume>, <fpage>158</fpage>–<lpage>67</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.80.1.158" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.80.1.158</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref040"><label>40</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Hoffman</surname> <given-names>B. J.</given-names></name>, <name name-style="western"><surname>Gorman</surname> <given-names>C. A.</given-names></name>, <name name-style="western"><surname>Blair</surname> <given-names>C. A.</given-names></name>, <name name-style="western"><surname>Meriac</surname> <given-names>J. P.</given-names></name>, <name name-style="western"><surname>Overstreet</surname> <given-names>B. L.</given-names></name>, &amp; <name name-style="western"><surname>Atchley</surname> <given-names>E. K.</given-names></name> (<year>2012</year>). <article-title>Evidence for the effectiveness of an alternative multisource performance rating methodology</article-title>. <source>Personnel Psychology</source>, <volume>65</volume>, <fpage>531</fpage>–<lpage>563</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1111/j.1744-6570.2012.01252.x" xlink:type="simple">10.1111/j.1744-6570.2012.01252.x</ext-link></comment></mixed-citation></ref>
<ref id="pone.0222694.ref041"><label>41</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Eppich</surname> <given-names>W.</given-names></name>, <name name-style="western"><surname>Nannicelli</surname> <given-names>A.</given-names></name>, <name name-style="western"><surname>Seivert</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>Sohn</surname> <given-names>M-W.</given-names></name>, <name name-style="western"><surname>Rozenfeld</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Woods</surname> <given-names>D.</given-names></name>, <etal>et al</etal>. (<year>2015</year>). <article-title>A Rater Training Protocol to Assess Team Performance</article-title>. <source><italic>Journal Of Continuing Education in the Health Professions</italic></source>, <volume>35</volume>(<issue>2</issue>), <fpage>83</fpage>–<lpage>90</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1002/chp.21270" xlink:type="simple">10.1002/chp.21270</ext-link></comment> <object-id pub-id-type="pmid">26115107</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref042"><label>42</label><mixed-citation publication-type="other" xlink:type="simple">Rosales, C., Díaz-Cabrera, M.D., &amp; Hernández-Fernaud, E. (under review). Influence of the type of measurement and the effect of primacy and recency on task and citizenship performance appraisal.</mixed-citation></ref>
<ref id="pone.0222694.ref043"><label>43</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Giráldez</surname> <given-names>M.</given-names></name>, &amp; <name name-style="western"><surname>Provencio</surname> <given-names>M.</given-names></name> (<year>2012</year>). <article-title>Life Vest Under Your Seat (Volamos hacia Miami) (Cortometraje)</article-title>. <source>España</source>. Disponible en: <ext-link ext-link-type="uri" xlink:href="https://vimeo.com/52342817" xlink:type="simple">https://vimeo.com/52342817</ext-link>.</mixed-citation></ref>
<ref id="pone.0222694.ref044"><label>44</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Díaz-Vilela</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Delgado</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>Isla-Díaz</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Díaz-Cabrera</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Hernández-Fernaud</surname> <given-names>E.</given-names></name> &amp; <name name-style="western"><surname>Rosales-Sánchez</surname> <given-names>C.</given-names></name> (2015). <article-title>Relationships between contextual and task performance and interrater agreement: Are there any?</article-title> <source><italic>Plos One</italic></source>, <volume>10</volume>(<issue>10</issue>):<fpage>e0139898</fpage>, <year>2015</year>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1371/journal.pone.0139898" xlink:type="simple">10.1371/journal.pone.0139898</ext-link></comment> <object-id pub-id-type="pmid">26473956</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref045"><label>45</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Díaz-Cabrera</surname> <given-names>D.</given-names></name>, <name name-style="western"><surname>Hernández-Fernaud</surname> <given-names>E.</given-names></name>, <name name-style="western"><surname>Isla-Díaz</surname> <given-names>R.</given-names></name>, <name name-style="western"><surname>Delgado</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>Díaz-Vilela</surname> <given-names>L.</given-names></name> &amp; <name name-style="western"><surname>Rosales-Sánchez</surname> <given-names>C.</given-names></name> (<year>2014</year>). <article-title>Factores relevantes para aumentar la precisión, la viabilidad y el éxito de los sistemas de evaluación del desempeño laboral</article-title>. <source><italic>Papeles del Psicólogo</italic></source>, <volume>35</volume>(<issue>2</issue>), <fpage>3</fpage>–<lpage>13</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref046"><label>46</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Aguinis</surname> <given-names>H.</given-names></name>, <name name-style="western"><surname>Mazurkiewicz</surname> <given-names>M. D.</given-names></name>, &amp; <name name-style="western"><surname>Heggestad</surname> <given-names>E. D.</given-names></name> (<year>2009</year>). <article-title>Using web-based frame-of reference training to decrease biases in personality-based job analysis: An experimental field study</article-title>. <source><italic>Personnel Psychology</italic></source>, <volume>62</volume>, <fpage>405</fpage>–<lpage>438</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref047"><label>47</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Ivancevich</surname> <given-names>J. M.</given-names></name> (<year>1979</year>.<article-title>) Longitudinal study of the effects of rater training on psychometric error in ratings</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>64</volume>, <fpage>502</fpage>–<lpage>508</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref048"><label>48</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Lee</surname> <given-names>J. A.</given-names></name> (<year>1994</year>). <article-title>The effects of cognitive style and training on performance ratings’ validity</article-title>. <source><italic>Journal of Business and Psychology</italic></source>, <volume>8</volume>, <fpage>297</fpage>–<lpage>308</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref049"><label>49</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Melchers</surname> <given-names>K. G.</given-names></name>, <name name-style="western"><surname>Lienhardt</surname> <given-names>N.</given-names></name>, <name name-style="western"><surname>von Aarburg</surname> <given-names>M.</given-names></name>, &amp; <name name-style="western"><surname>Kleinmann</surname> <given-names>M.</given-names></name> (<year>2011</year>). <article-title>Is more structure always better? An evaluation of the effects of rater training and descriptively anchored rating scales on rating accuracy in a structured interview</article-title>. <source>Personnel Psychology</source>, <volume>64</volume>, <fpage>53</fpage>–<lpage>87</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref050"><label>50</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Pulakos</surname> <given-names>E. D.</given-names></name> (<year>1986</year>). <article-title>The development of training programs to increase accuracy with different training tools</article-title>. <source><italic>Organizational Behavior and Human Decision Processes</italic></source>, <volume>38</volume>(<issue>1</issue>), <fpage>76</fpage>–<lpage>91</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref051"><label>51</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Schleicher</surname> <given-names>D. J.</given-names></name>, <name name-style="western"><surname>Day</surname> <given-names>D. V.</given-names></name>, <name name-style="western"><surname>Mayes</surname> <given-names>B. T.</given-names></name>, &amp; <name name-style="western"><surname>Riggio</surname> <given-names>R. E.</given-names></name> (<year>2002</year>). <article-title>A new frame of reference training: Enhancing the construct validity of assessment centers</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>87</volume>, <fpage>735</fpage>–<lpage>746</lpage>. <object-id pub-id-type="pmid">12184577</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref052"><label>52</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Stamoulis</surname> <given-names>D. T.</given-names></name>, &amp; <name name-style="western"><surname>Hauenstein</surname> <given-names>N. M. A.</given-names></name> (<year>1993</year>). <article-title>Rater training and rating accuracy: Training for dimensional accuracy versus training for ratee differentiation</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>78</volume>, <fpage>994</fpage>–<lpage>1003</lpage></mixed-citation></ref>
<ref id="pone.0222694.ref053"><label>53</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Sulsky</surname> <given-names>L.</given-names></name>, <name name-style="western"><surname>Skarlicki</surname> <given-names>D.P.</given-names></name>, &amp; <name name-style="western"><surname>Keown</surname> <given-names>J.</given-names></name> (<year>2002</year>). <article-title>Frame-of-reference training: Overcoming the effects of organizational citizenship behavior on performance appraisal accuracy</article-title>. <source><italic>Journal of Applied Social Psychology</italic></source>, <volume>6</volume>, <fpage>1224</fpage>–<lpage>1241</lpage>.</mixed-citation></ref>
<ref id="pone.0222694.ref054"><label>54</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Uggerslev</surname> <given-names>K.L.</given-names></name>, &amp; <name name-style="western"><surname>Sulsky</surname> <given-names>L.M.</given-names></name> (<year>2008</year>). <article-title>Using frame-of-reference training to understand the implications of rater idiosyncrasy for rating accuracy</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>93</volume>, <fpage>711</fpage>–<lpage>719</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/0021-9010.93.3.711" xlink:type="simple">10.1037/0021-9010.93.3.711</ext-link></comment> <object-id pub-id-type="pmid">18457499</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref055"><label>55</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>DeNisi</surname> <given-names>A.S.</given-names></name>, &amp; <name name-style="western"><surname>Murphy</surname> <given-names>K.</given-names></name> (<year>2017</year>). <article-title>Performance Appraisal and Performance Management: 100 Years of Progress?</article-title> <source>Journal of Applied Psychology</source>, <volume>102</volume>(<issue>3</issue>), <fpage>421</fpage>–<lpage>433</lpage>. <comment>doi: <ext-link ext-link-type="uri" xlink:href="https://doi.org/10.1037/apl0000085" xlink:type="simple">10.1037/apl0000085</ext-link></comment> <object-id pub-id-type="pmid">28125265</object-id></mixed-citation></ref>
<ref id="pone.0222694.ref056"><label>56</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Athey</surname> <given-names>T. R.</given-names></name>, &amp; <name name-style="western"><surname>McIntyre</surname> <given-names>R. M.</given-names></name> (<year>1987</year>). <article-title>Effect of rater training on rater accuracy: Level-of-processing theory and social facilitation theory perspectives</article-title>. <source><italic>Journal of Applied Psychology</italic></source>, <volume>72</volume>, <fpage>239</fpage>–<lpage>244</lpage>. <ext-link ext-link-type="uri" xlink:href="http://dx.doi.org/10.1037/0021-9010.72.4.567" xlink:type="simple">http://dx.doi.org/10.1037/0021-9010.72.4.567</ext-link></mixed-citation></ref>
<ref id="pone.0222694.ref057"><label>57</label><mixed-citation publication-type="book" xlink:type="simple"><name name-style="western"><surname>Hauenstein</surname> <given-names>N. M. A.</given-names></name> (<year>1998</year>). <chapter-title>Training raters to increase the accuracy of appraisals and the usefulness of feedback</chapter-title>. <name name-style="western"><surname>Smither</surname> <given-names>En J.</given-names></name> (Ed.), <source><italic>Performance appraisal</italic></source> (pp. <fpage>404</fpage>–<lpage>444</lpage>). <publisher-loc>San Francisco</publisher-loc>: <publisher-name>Jossey-Bass</publisher-name>.</mixed-citation></ref>
<ref id="pone.0222694.ref058"><label>58</label><mixed-citation publication-type="journal" xlink:type="simple"><name name-style="western"><surname>Bernardin</surname> <given-names>H. J.</given-names></name>, <name name-style="western"><surname>Tyler</surname> <given-names>C. L.</given-names></name>, &amp; <name name-style="western"><surname>Villanova</surname> <given-names>P.</given-names></name> (<year>2009</year>). <article-title>Rating level and accuracy as a function of rater personality</article-title>. <source><italic>International Journal of Selection and Assessment</italic></source>, <volume>17</volume>, <fpage>300</fpage>–<lpage>310</lpage>.</mixed-citation></ref>
</ref-list>
</back>
</article>